Compare commits

...

13 Commits

Author SHA1 Message Date
9b1953d29e undo failure 2025-11-10 14:58:31 -08:00
874ba0a6b4 better message 2025-11-07 09:50:14 -08:00
1868ca8b6a try to extract class name 2025-11-07 09:43:06 -08:00
2228067938 typo again 2025-11-07 08:25:02 -08:00
56ea1aec79 cache val 2025-11-06 20:43:45 -08:00
c8324d30f6 typo again 2025-11-06 18:27:21 -08:00
26568b469d fix path 2025-11-06 15:44:06 -08:00
3e7188c5e7 lint 2025-11-06 14:54:20 -08:00
6a3694adb0 more general 2025-11-06 14:51:58 -08:00
24c259cd6c tc 2025-11-06 14:13:31 -08:00
9cc873e582 lint 2025-11-06 14:02:27 -08:00
cb574c7adb lint, int, fail 2025-11-06 13:48:52 -08:00
44cfeec317 tc 2025-11-06 13:44:53 -08:00
3 changed files with 81 additions and 11 deletions

View File

@ -308,12 +308,16 @@ class StepcurrentPlugin:
self.report_status = ""
assert config.cache is not None
self.cache: pytest.Cache = config.cache
self.directory = f"{STEPCURRENT_CACHE_DIR}/{config.getoption('stepcurrent')}"
self.lastrun: Optional[str] = self.cache.get(self.directory, None)
directory = f"{STEPCURRENT_CACHE_DIR}/{config.getoption('stepcurrent')}"
self.lastrun_location = f"{directory}/lastrun"
self.lastrun: Optional[str] = self.cache.get(self.lastrun_location, None)
self.initial_val = self.lastrun
self.skip: bool = config.getoption("stepcurrent_skip")
self.run_single: bool = config.getoption("run_single")
self.made_failing_xml_location = f"{directory}/made_failing_xml"
self.cache.set(self.made_failing_xml_location, False)
def pytest_collection_modifyitems(self, config: Config, items: list[Any]) -> None:
if not self.lastrun:
self.report_status = "Cannot find last run test, not skipping"
@ -349,8 +353,10 @@ class StepcurrentPlugin:
def pytest_runtest_protocol(self, item, nextitem) -> None:
self.lastrun = item.nodeid
self.cache.set(self.directory, self.lastrun)
self.cache.set(self.lastrun_location, self.lastrun)
def pytest_sessionfinish(self, session, exitstatus):
if exitstatus == 0:
self.cache.set(self.directory, self.initial_val)
self.cache.set(self.lastrun_location, self.initial_val)
if exitstatus != 0:
self.cache.set(self.made_failing_xml_location, True)

View File

@ -78,6 +78,7 @@ from tools.testing.test_selections import (
try:
from tools.testing.upload_artifacts import (
parse_xml_and_upload_json,
upload_adhoc_failure_json,
zip_and_upload_artifacts,
)
except ImportError:
@ -87,7 +88,10 @@ except ImportError:
def parse_xml_and_upload_json():
pass
def zip_and_upload_artifacts(failed: bool):
def zip_and_upload_artifacts(*args, **kwargs):
pass
def upload_adhoc_failure_json(*args, **kwargs):
pass
@ -642,6 +646,7 @@ def run_test(
output,
options.continue_through_error,
test_file,
options,
)
else:
command.extend([f"--sc={stepcurrent_key}", "--print-items"])
@ -728,6 +733,7 @@ def run_test_retries(
output,
continue_through_error,
test_file,
options,
):
# Run the test with -x to stop at first failure. Rerun the test by itself.
# If it succeeds, move on to the rest of the tests in a new process. If it
@ -746,6 +752,16 @@ def run_test_retries(
num_failures = defaultdict(int)
def read_pytest_cache(key: str) -> Any:
cache_file = (
REPO_ROOT / ".pytest_cache/v/cache/stepcurrent" / stepcurrent_key / key
)
try:
with open(cache_file) as f:
return f.read()
except FileNotFoundError:
return None
print_items = ["--print-items"]
sc_command = f"--sc={stepcurrent_key}"
while True:
@ -766,12 +782,11 @@ def run_test_retries(
# Read what just failed/ran
try:
with open(
REPO_ROOT / ".pytest_cache/v/cache/stepcurrent" / stepcurrent_key
) as f:
current_failure = f.read()
if current_failure == "null":
current_failure = f"'{test_file}'"
current_failure = read_pytest_cache("lastrun")
if current_failure is None:
raise FileNotFoundError
if current_failure == "null":
current_failure = f"'{test_file}'"
except FileNotFoundError:
print_to_file(
"No stepcurrent file found. Either pytest didn't get to run (e.g. import error)"
@ -794,6 +809,13 @@ def run_test_retries(
# This is for log classifier so it can prioritize consistently
# failing tests instead of reruns. [1:-1] to remove quotes
print_to_file(f"FAILED CONSISTENTLY: {current_failure[1:-1]}")
if (
read_pytest_cache("made_failing_xml") == "false"
and IS_CI
and options.upload_artifacts_while_running
):
upload_adhoc_failure_json(test_file, current_failure[1:-1])
if not continue_through_error:
print_to_file("Stopping at first consistent failure")
break

View File

@ -208,3 +208,45 @@ def parse_xml_and_upload_json() -> None:
lock.release()
except Exception as e:
print(f"Failed to parse and upload json test reports: {e}")
def upload_adhoc_failure_json(invoking_file: str, current_failure: str) -> None:
"""
manually upload a json to s3 indicating that the entire test file failed
since xml was probably not generated in this case
"""
try:
job_id = int(os.environ["JOB_ID"])
workflow_id = int(os.environ["GITHUB_RUN_ID"])
except Exception as e:
print(f"Failed to get job_id or workflow_id: {e}")
return
split_failure = current_failure.split("::")
if len(split_failure) >= 2:
className = split_failure[-2]
testName = split_failure[-1]
else:
testName = current_failure
className = ""
message = "The test file failed but pytest did not generate xml. The most likely cause is a segfault"
j = {
"invoking_file": invoking_file,
"file": f"{invoking_file}.py",
"name": testName,
"classname": className,
"workflow_id": workflow_id,
"workflow_run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
"job_id": job_id,
"failure": {"message": message, "text": message},
}
gzipped = gzip.compress(json.dumps(j).encode("utf-8"))
s3_key = f"{invoking_file.replace('/', '_')}_{os.urandom(8).hex()}.json"
get_s3_resource().put_object(
Body=gzipped,
Bucket="gha-artifacts",
Key=f"test_jsons_while_running/{workflow_id}/{job_id}/{s3_key}",
ContentType="application/json",
ContentEncoding="gzip",
)