Files
pytorch/tools/stats/import_test_stats.py
Catherine Lee dab272eed8 [td] Consistent pytest cache (#113804)
Move the pytest cache downloading into the build step and store it in additional ci files so that it stays consistent during sharding.

Only build env is taken into account now instead of also test config since we might not have the test config during build time, making it less specific, but I also think this might be better since tests are likely to fail across the same test config (I also think it might be worth not even looking at build env but thats a different topic)

Each cache upload should only include information from the current run.  Do not merge current cache with downloaded cache during upload (shouldn't matter anyways since the downloaded cache won't exist at the time)

From what I cant tell of the s3 retention policy, pytest cache files will be deleted after 30 days (cc @ZainRizvi to confirm), so we never have to worry about space or pulling old versions.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/113804
Approved by: https://github.com/ZainRizvi
2023-11-17 23:45:47 +00:00

179 lines
6.0 KiB
Python

#!/usr/bin/env python3
import datetime
import json
import os
import pathlib
import shutil
from typing import Any, Callable, cast, Dict, List, Optional, Union
from urllib.request import urlopen
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
def get_disabled_issues() -> List[str]:
reenabled_issues = os.getenv("REENABLED_ISSUES", "")
issue_numbers = reenabled_issues.split(",")
print("Ignoring disabled issues: ", issue_numbers)
return issue_numbers
SLOW_TESTS_FILE = ".pytorch-slow-tests.json"
DISABLED_TESTS_FILE = ".pytorch-disabled-tests.json"
ADDITIONAL_CI_FILES_FOLDER = pathlib.Path(".additional_ci_files")
TEST_TIMES_FILE = "test-times.json"
TEST_CLASS_TIMES_FILE = "test-class-times.json"
TEST_FILE_RATINGS_FILE = "test-file-ratings.json"
TEST_CLASS_RATINGS_FILE = "test-class-ratings.json"
TD_HEURISTIC_PROFILING_FILE = "td_heuristic_profiling.json"
TD_HEURISTIC_HISTORICAL_EDITED_FILES = "td_heuristic_historical_edited_files.json"
TD_HEURISTIC_PREVIOUSLY_FAILED = "previous_failures.json"
FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds
def fetch_and_cache(
dirpath: Union[str, pathlib.Path],
name: str,
url: str,
process_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
) -> Dict[str, Any]:
"""
This fetch and cache utils allows sharing between different process.
"""
pathlib.Path(dirpath).mkdir(exist_ok=True)
path = os.path.join(dirpath, name)
print(f"Downloading {url} to {path}")
def is_cached_file_valid() -> bool:
# Check if the file is new enough (see: FILE_CACHE_LIFESPAN_SECONDS). A real check
# could make a HEAD request and check/store the file's ETag
fname = pathlib.Path(path)
now = datetime.datetime.now()
mtime = datetime.datetime.fromtimestamp(fname.stat().st_mtime)
diff = now - mtime
return diff.total_seconds() < FILE_CACHE_LIFESPAN_SECONDS
if os.path.exists(path) and is_cached_file_valid():
# Another test process already download the file, so don't re-do it
with open(path) as f:
return cast(Dict[str, Any], json.load(f))
for _ in range(3):
try:
contents = urlopen(url, timeout=5).read().decode("utf-8")
processed_contents = process_fn(json.loads(contents))
with open(path, "w") as f:
f.write(json.dumps(processed_contents))
return processed_contents
except Exception as e:
print(f"Could not download {url} because: {e}.")
print(f"All retries exhausted, downloading {url} failed.")
return {}
def get_slow_tests(
dirpath: str, filename: str = SLOW_TESTS_FILE
) -> Optional[Dict[str, float]]:
url = "https://ossci-metrics.s3.amazonaws.com/slow-tests.json"
try:
return fetch_and_cache(dirpath, filename, url, lambda x: x)
except Exception:
print("Couldn't download slow test set, leaving all tests enabled...")
return {}
def get_test_times() -> Dict[str, Dict[str, float]]:
return get_from_test_infra_generated_stats(
"test-times.json",
TEST_TIMES_FILE,
"Couldn't download test times...",
)
def get_test_class_times() -> Dict[str, Dict[str, float]]:
return get_from_test_infra_generated_stats(
"test-class-times.json",
TEST_CLASS_TIMES_FILE,
"Couldn't download test times...",
)
def get_disabled_tests(
dirpath: str, filename: str = DISABLED_TESTS_FILE
) -> Optional[Dict[str, Any]]:
def process_disabled_test(the_response: Dict[str, Any]) -> Dict[str, Any]:
# remove re-enabled tests and condense even further by getting rid of pr_num
disabled_issues = get_disabled_issues()
disabled_test_from_issues = dict()
for test_name, (pr_num, link, platforms) in the_response.items():
if pr_num not in disabled_issues:
disabled_test_from_issues[test_name] = (
link,
platforms,
)
return disabled_test_from_issues
try:
url = "https://ossci-metrics.s3.amazonaws.com/disabled-tests-condensed.json"
return fetch_and_cache(dirpath, filename, url, process_disabled_test)
except Exception:
print("Couldn't download test skip set, leaving all tests enabled...")
return {}
def get_test_file_ratings() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"file_test_rating.json",
TEST_FILE_RATINGS_FILE,
"Couldn't download test file ratings file, not reordering...",
)
def get_test_class_ratings() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"file_test_class_rating.json",
TEST_CLASS_RATINGS_FILE,
"Couldn't download test class ratings file, not reordering...",
)
def get_td_heuristic_historial_edited_files_json() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"td_heuristic_historical_edited_files.json",
TD_HEURISTIC_HISTORICAL_EDITED_FILES,
"Couldn't download td_heuristic_historical_edited_files.json, not reordering...",
)
def get_td_heuristic_profiling_json() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"td_heuristic_profiling.json",
TD_HEURISTIC_PROFILING_FILE,
"Couldn't download td_heuristic_profiling.json not reordering...",
)
def copy_pytest_cache() -> None:
original_path = REPO_ROOT / ".pytest_cache/v/cache/lastfailed"
if not original_path.exists():
return
shutil.copyfile(
original_path,
REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PREVIOUSLY_FAILED,
)
def get_from_test_infra_generated_stats(
from_file: str, to_file: str, failure_explanation: str
) -> Dict[str, Any]:
url = f"https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/{from_file}"
try:
return fetch_and_cache(
REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER, to_file, url, lambda x: x
)
except Exception:
print(failure_explanation)
return {}