mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Reduce pytest prints (#117069)
* custom pytest-shard so I can control the verbosity (also index by 1 since it's confusing) * normal runs (not keep-going) always rerun each failed test 9 times (3 per process, 3 processes). Previously it would only run the entire test file 3 times, so if a test before you segfaulted, you only got 2 tries Example of quieter log https://github.com/pytorch/pytorch/actions/runs/7481334046/job/20363147497 "items in shard" only gets printed once at the beginning, and the reruns just say how many got skipped. Pull Request resolved: https://github.com/pytorch/pytorch/pull/117069 Approved by: https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
e432b2e607
commit
2f89ef2300
@ -162,11 +162,6 @@ pytest-xdist==3.3.1
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
pytest-shard==0.1.2
|
|
||||||
#Description: plugin spliting up tests in pytest
|
|
||||||
#Pinned versions:
|
|
||||||
#test that import:
|
|
||||||
|
|
||||||
pytest-flakefinder==1.1.0
|
pytest-flakefinder==1.1.0
|
||||||
#Description: plugin for rerunning tests a fixed number of times in pytest
|
#Description: plugin for rerunning tests a fixed number of times in pytest
|
||||||
#Pinned versions: 1.1.0
|
#Pinned versions: 1.1.0
|
||||||
|
|||||||
@ -16,7 +16,6 @@ pytest==7.3.2
|
|||||||
pytest-xdist==3.3.1
|
pytest-xdist==3.3.1
|
||||||
pytest-rerunfailures==10.3
|
pytest-rerunfailures==10.3
|
||||||
pytest-flakefinder==1.1.0
|
pytest-flakefinder==1.1.0
|
||||||
pytest-shard==0.1.2
|
|
||||||
scipy==1.10.1
|
scipy==1.10.1
|
||||||
sympy==1.11.1
|
sympy==1.11.1
|
||||||
unittest-xml-reporting<=3.2.0,>=2.0.0
|
unittest-xml-reporting<=3.2.0,>=2.0.0
|
||||||
|
|||||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@ -225,7 +225,7 @@ jobs:
|
|||||||
cache: pip
|
cache: pip
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
|
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
|
||||||
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
|
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
|
||||||
- name: Run run_test.py (nonretryable)
|
- name: Run run_test.py (nonretryable)
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@ -8,6 +8,8 @@ addopts =
|
|||||||
--capture=sys
|
--capture=sys
|
||||||
# don't suppress warnings, but don't shove them all to the end either
|
# don't suppress warnings, but don't shove them all to the end either
|
||||||
-p no:warnings
|
-p no:warnings
|
||||||
|
# Use custom pytest shard located in test/pytest_shard_custom.py instead
|
||||||
|
-p no:pytest-shard
|
||||||
# don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844)
|
# don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844)
|
||||||
--assert=plain
|
--assert=plain
|
||||||
testpaths =
|
testpaths =
|
||||||
|
|||||||
@ -19,6 +19,7 @@ import copy
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions
|
||||||
|
|
||||||
# a lot of this file is copied from _pytest.junitxml and modified to get rerun info
|
# a lot of this file is copied from _pytest.junitxml and modified to get rerun info
|
||||||
|
|
||||||
@ -84,6 +85,7 @@ def pytest_addoption(parser: Parser) -> None:
|
|||||||
"Emit XML for schema: one of legacy|xunit1|xunit2",
|
"Emit XML for schema: one of legacy|xunit1|xunit2",
|
||||||
default="xunit2",
|
default="xunit2",
|
||||||
)
|
)
|
||||||
|
shard_addoptions(parser)
|
||||||
|
|
||||||
|
|
||||||
def pytest_configure(config: Config) -> None:
|
def pytest_configure(config: Config) -> None:
|
||||||
@ -105,6 +107,8 @@ def pytest_configure(config: Config) -> None:
|
|||||||
config.option.stepcurrent = config.getoption("stepcurrent_skip")
|
config.option.stepcurrent = config.getoption("stepcurrent_skip")
|
||||||
if config.getoption("stepcurrent"):
|
if config.getoption("stepcurrent"):
|
||||||
config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin")
|
config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin")
|
||||||
|
if config.getoption("num_shards"):
|
||||||
|
config.pluginmanager.register(PytestShardPlugin(config), "pytestshardplugin")
|
||||||
|
|
||||||
|
|
||||||
def pytest_unconfigure(config: Config) -> None:
|
def pytest_unconfigure(config: Config) -> None:
|
||||||
|
|||||||
67
test/pytest_shard_custom.py
Normal file
67
test/pytest_shard_custom.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
"""
|
||||||
|
Custom pytest shard plugin
|
||||||
|
https://github.com/AdamGleave/pytest-shard/blob/64610a08dac6b0511b6d51cf895d0e1040d162ad/pytest_shard/pytest_shard.py#L1
|
||||||
|
Modifications:
|
||||||
|
* shards are now 1 indexed instead of 0 indexed
|
||||||
|
* option for printing items in shard
|
||||||
|
"""
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
from _pytest.config.argparsing import Parser
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_addoptions(parser: Parser):
|
||||||
|
"""Add options to control sharding."""
|
||||||
|
group = parser.getgroup("shard")
|
||||||
|
group.addoption(
|
||||||
|
"--shard-id", dest="shard_id", type=int, default=1, help="Number of this shard."
|
||||||
|
)
|
||||||
|
group.addoption(
|
||||||
|
"--num-shards",
|
||||||
|
dest="num_shards",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="Total number of shards.",
|
||||||
|
)
|
||||||
|
group.addoption(
|
||||||
|
"--print-items",
|
||||||
|
dest="print_items",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Print out the items being tested in this shard.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PytestShardPlugin:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def pytest_report_collectionfinish(self, config, items) -> str:
|
||||||
|
"""Log how many and which items are tested in this shard."""
|
||||||
|
msg = f"Running {len(items)} items in this shard"
|
||||||
|
if config.getoption("print_items"):
|
||||||
|
msg += ": " + ", ".join([item.nodeid for item in items])
|
||||||
|
return msg
|
||||||
|
|
||||||
|
def sha256hash(self, x: str) -> int:
|
||||||
|
return int.from_bytes(hashlib.sha256(x.encode()).digest(), "little")
|
||||||
|
|
||||||
|
def filter_items_by_shard(self, items, shard_id: int, num_shards: int):
|
||||||
|
"""Computes `items` that should be tested in `shard_id` out of `num_shards` total shards."""
|
||||||
|
new_items = [
|
||||||
|
item
|
||||||
|
for item in items
|
||||||
|
if self.sha256hash(item.nodeid) % num_shards == shard_id - 1
|
||||||
|
]
|
||||||
|
return new_items
|
||||||
|
|
||||||
|
def pytest_collection_modifyitems(self, config, items):
|
||||||
|
"""Mutate the collection to consist of just items to be tested in this shard."""
|
||||||
|
shard_id = config.getoption("shard_id")
|
||||||
|
shard_total = config.getoption("num_shards")
|
||||||
|
if shard_id < 1 or shard_id > shard_total:
|
||||||
|
raise ValueError(
|
||||||
|
f"{shard_id} is not a valid shard ID out of {shard_total} total shards"
|
||||||
|
)
|
||||||
|
|
||||||
|
items[:] = self.filter_items_by_shard(items, shard_id, shard_total)
|
||||||
@ -517,11 +517,11 @@ def run_test(
|
|||||||
else:
|
else:
|
||||||
unittest_args.extend(
|
unittest_args.extend(
|
||||||
[
|
[
|
||||||
f"--shard-id={test_module.shard - 1}",
|
f"--shard-id={test_module.shard}",
|
||||||
f"--num-shards={test_module.num_shards}",
|
f"--num-shards={test_module.num_shards}",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
stepcurrent_key = f"{test_file}_{test_module.shard - 1}_{os.urandom(8).hex()}"
|
stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}"
|
||||||
|
|
||||||
if options.verbose:
|
if options.verbose:
|
||||||
unittest_args.append(f'-{"v"*options.verbose}') # in case of pytest
|
unittest_args.append(f'-{"v"*options.verbose}') # in case of pytest
|
||||||
@ -540,7 +540,6 @@ def run_test(
|
|||||||
unittest_args.extend(
|
unittest_args.extend(
|
||||||
get_pytest_args(
|
get_pytest_args(
|
||||||
options,
|
options,
|
||||||
stepcurrent_key,
|
|
||||||
is_cpp_test=is_cpp_test,
|
is_cpp_test=is_cpp_test,
|
||||||
is_distributed_test=is_distributed_test,
|
is_distributed_test=is_distributed_test,
|
||||||
)
|
)
|
||||||
@ -603,7 +602,7 @@ def run_test(
|
|||||||
os.close(log_fd)
|
os.close(log_fd)
|
||||||
|
|
||||||
command = (launcher_cmd or []) + executable + argv
|
command = (launcher_cmd or []) + executable + argv
|
||||||
num_retries = 0 if "--subprocess" in command or RERUN_DISABLED_TESTS else 2
|
should_retry = "--subprocess" not in command and not RERUN_DISABLED_TESTS
|
||||||
is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get(
|
is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get(
|
||||||
"BUILD_ENVRIONMENT", ""
|
"BUILD_ENVRIONMENT", ""
|
||||||
)
|
)
|
||||||
@ -611,7 +610,7 @@ def run_test(
|
|||||||
THRESHOLD * 6
|
THRESHOLD * 6
|
||||||
if is_slow
|
if is_slow
|
||||||
else THRESHOLD * 3
|
else THRESHOLD * 3
|
||||||
if num_retries > 0
|
if should_retry
|
||||||
and isinstance(test_module, ShardedTest)
|
and isinstance(test_module, ShardedTest)
|
||||||
and test_module.time is not None
|
and test_module.time is not None
|
||||||
else None
|
else None
|
||||||
@ -623,12 +622,18 @@ def run_test(
|
|||||||
if IS_CI:
|
if IS_CI:
|
||||||
output = stack.enter_context(open(log_path, "w"))
|
output = stack.enter_context(open(log_path, "w"))
|
||||||
|
|
||||||
if options.continue_through_error and "--subprocess" not in command:
|
if should_retry:
|
||||||
# I think subprocess is better handled by common_utils? but it's not working atm
|
ret_code, was_rerun = run_test_retries(
|
||||||
ret_code, was_rerun = run_test_continue_through_error(
|
command,
|
||||||
command, test_directory, env, timeout, stepcurrent_key, output
|
test_directory,
|
||||||
|
env,
|
||||||
|
timeout,
|
||||||
|
stepcurrent_key,
|
||||||
|
output,
|
||||||
|
options.continue_through_error,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
command.append(f"--sc={stepcurrent_key}")
|
||||||
ret_code, was_rerun = retry_shell(
|
ret_code, was_rerun = retry_shell(
|
||||||
command,
|
command,
|
||||||
test_directory,
|
test_directory,
|
||||||
@ -636,7 +641,6 @@ def run_test(
|
|||||||
stderr=output,
|
stderr=output,
|
||||||
env=env,
|
env=env,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
retries=num_retries,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Pytest return code 5 means no test is collected. This is needed
|
# Pytest return code 5 means no test is collected. This is needed
|
||||||
@ -655,23 +659,37 @@ def run_test(
|
|||||||
return ret_code
|
return ret_code
|
||||||
|
|
||||||
|
|
||||||
def run_test_continue_through_error(
|
def run_test_retries(
|
||||||
command, test_directory, env, timeout, stepcurrent_key, output
|
command,
|
||||||
|
test_directory,
|
||||||
|
env,
|
||||||
|
timeout,
|
||||||
|
stepcurrent_key,
|
||||||
|
output,
|
||||||
|
continue_through_error,
|
||||||
):
|
):
|
||||||
# Run the test with -x to stop at first failure. Try again, skipping the
|
# Run the test with -x to stop at first failure. Try again, skipping the
|
||||||
# previously run tests, repeating this until there is a test that fails 3
|
# previously run tests, repeating this until there is a test that fails 3
|
||||||
# times (same number of rVetries we typically give). Then we skip that
|
# times (same number of rVetries we typically give).
|
||||||
# test, and keep going. Basically if the same test fails 3 times in a row,
|
#
|
||||||
# skip the test on the next run, but still fail in the end. I take advantage
|
# If continue through error is not set, then we fail fast.
|
||||||
# of the value saved in stepcurrent to keep track of the most recently run
|
#
|
||||||
# test (which is the one that failed if there was a failure).
|
# If continue through error is set, then we skip that test, and keep going.
|
||||||
|
# Basically if the same test fails 3 times in a row, skip the test on the
|
||||||
|
# next run, but still fail in the end. I take advantage of the value saved
|
||||||
|
# in stepcurrent to keep track of the most recently run test (which is the
|
||||||
|
# one that failed if there was a failure).
|
||||||
|
|
||||||
|
def print_to_file(s):
|
||||||
|
print(s, file=output, flush=True)
|
||||||
|
|
||||||
num_failures = defaultdict(int)
|
num_failures = defaultdict(int)
|
||||||
|
|
||||||
|
print_items = ["--print-items"]
|
||||||
sc_command = f"--sc={stepcurrent_key}"
|
sc_command = f"--sc={stepcurrent_key}"
|
||||||
while True:
|
while True:
|
||||||
ret_code = shell(
|
ret_code = shell(
|
||||||
command + [sc_command],
|
command + [sc_command] + print_items,
|
||||||
test_directory,
|
test_directory,
|
||||||
stdout=output,
|
stdout=output,
|
||||||
stderr=output,
|
stderr=output,
|
||||||
@ -682,11 +700,7 @@ def run_test_continue_through_error(
|
|||||||
if ret_code == 0:
|
if ret_code == 0:
|
||||||
break # Got to the end of the test suite successfully
|
break # Got to the end of the test suite successfully
|
||||||
signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else ""
|
signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else ""
|
||||||
print(
|
print_to_file(f"Got exit code {ret_code}{signal_name}")
|
||||||
f"Got exit code {ret_code}{signal_name}, retrying...",
|
|
||||||
file=output,
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Read what just failed
|
# Read what just failed
|
||||||
with open(
|
with open(
|
||||||
@ -696,25 +710,24 @@ def run_test_continue_through_error(
|
|||||||
|
|
||||||
num_failures[current_failure] += 1
|
num_failures[current_failure] += 1
|
||||||
if num_failures[current_failure] >= 3:
|
if num_failures[current_failure] >= 3:
|
||||||
|
if not continue_through_error:
|
||||||
|
print_to_file("Stopping at first consistent failure")
|
||||||
|
break
|
||||||
sc_command = f"--scs={stepcurrent_key}"
|
sc_command = f"--scs={stepcurrent_key}"
|
||||||
else:
|
else:
|
||||||
sc_command = f"--sc={stepcurrent_key}"
|
sc_command = f"--sc={stepcurrent_key}"
|
||||||
|
print_to_file("Retrying...")
|
||||||
|
print_items = [] # do not continue printing them, massive waste of space
|
||||||
|
|
||||||
consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3]
|
consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3]
|
||||||
flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3]
|
flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3]
|
||||||
if len(flaky_failures) > 0:
|
if len(flaky_failures) > 0:
|
||||||
print(
|
print_to_file(
|
||||||
"The following tests failed flakily (had to be rerun in a new process,"
|
"The following tests failed and then succeeded when run in a new process"
|
||||||
+ f" doesn't include reruns froms same process): {flaky_failures}",
|
+ f"{flaky_failures}",
|
||||||
file=output,
|
|
||||||
flush=True,
|
|
||||||
)
|
)
|
||||||
if len(consistent_failures) > 0:
|
if len(consistent_failures) > 0:
|
||||||
print(
|
print_to_file(f"The following tests failed consistently: {consistent_failures}")
|
||||||
f"The following tests failed consistently: {consistent_failures}",
|
|
||||||
file=output,
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
return 1, True
|
return 1, True
|
||||||
return 0, any(x > 0 for x in num_failures.values())
|
return 0, any(x > 0 for x in num_failures.values())
|
||||||
|
|
||||||
@ -1040,9 +1053,7 @@ def handle_log_file(
|
|||||||
os.remove(file_path)
|
os.remove(file_path)
|
||||||
|
|
||||||
|
|
||||||
def get_pytest_args(
|
def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
|
||||||
options, stepcurrent_key, is_cpp_test=False, is_distributed_test=False
|
|
||||||
):
|
|
||||||
if RERUN_DISABLED_TESTS:
|
if RERUN_DISABLED_TESTS:
|
||||||
# Distributed tests are too slow, so running them x50 will cause the jobs to timeout after
|
# Distributed tests are too slow, so running them x50 will cause the jobs to timeout after
|
||||||
# 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the
|
# 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the
|
||||||
@ -1063,9 +1074,8 @@ def get_pytest_args(
|
|||||||
]
|
]
|
||||||
if not is_cpp_test:
|
if not is_cpp_test:
|
||||||
# C++ tests need to be run with pytest directly, not via python
|
# C++ tests need to be run with pytest directly, not via python
|
||||||
|
# We have a custom pytest shard that conflicts with the normal plugin
|
||||||
pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
|
pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
|
||||||
if not options.continue_through_error and IS_CI:
|
|
||||||
pytest_args.append(f"--sc={stepcurrent_key}")
|
|
||||||
else:
|
else:
|
||||||
# Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test
|
# Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test
|
||||||
# is much slower than running them directly
|
# is much slower than running them directly
|
||||||
@ -1638,7 +1648,6 @@ def run_tests(
|
|||||||
def check_pip_packages() -> None:
|
def check_pip_packages() -> None:
|
||||||
packages = [
|
packages = [
|
||||||
"pytest-rerunfailures",
|
"pytest-rerunfailures",
|
||||||
"pytest-shard",
|
|
||||||
"pytest-flakefinder",
|
"pytest-flakefinder",
|
||||||
"pytest-xdist",
|
"pytest-xdist",
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user