[ci] Save various json files from test infra into folder (#111516)

We pull a lot of files from https://github.com/pytorch/test-infra/blob/generated-stats/stats and name them separately when we add them to the artifacts in the build, so stick them in a folder and just add that instead.

Slow test and disabled test jsons remain as they were since they are pulled during the test step and do not need to be included in the artifacts during build since they are not used for sharding.

Sanity checked that test times could be found for linux, mac, windows, and rocm.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/111516
Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
This commit is contained in:
Catherine Lee
2023-10-23 20:38:25 +00:00
committed by PyTorch MergeBot
parent e509b162ed
commit 3b5b7ebd09
12 changed files with 60 additions and 49 deletions

View File

@ -127,9 +127,7 @@ python -c "import os, glob; os.system('python -mpip install --no-index --no-deps
:: export test times so that potential sharded tests that'll branch off this build will use consistent data
python tools/stats/export_test_times.py
copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
copy /Y ".pytorch-test-file-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
copy /Y ".pytorch-test-class-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
robocopy /E ".additional_ci_files" "%PYTORCH_FINAL_PACKAGE_DIR%\.additional_ci_files"
:: Also save build/.ninja_log as an artifact
copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"

View File

@ -1,9 +1,7 @@
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
echo Copying over test times file
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
pushd test

View File

@ -22,9 +22,7 @@ if "%SHARD_NUMBER%" == "1" (
)
echo Copying over test times file
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
echo Run nn tests
python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose

2
.circleci/config.yml generated
View File

@ -623,7 +623,7 @@ jobs:
- run:
name: Archive artifacts into zip
command: |
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
cp artifacts.zip /Users/distiller/workspace
- persist_to_workspace:

View File

@ -177,7 +177,7 @@
- run:
name: Archive artifacts into zip
command: |
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
cp artifacts.zip /Users/distiller/workspace
- persist_to_workspace:

View File

@ -170,7 +170,7 @@ jobs:
- name: Archive artifacts into zip
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5

View File

@ -182,7 +182,7 @@ jobs:
- name: Archive artifacts into zip
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
run: |
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
- name: Store PyTorch Build Artifacts on GHA
uses: actions/upload-artifact@v3

4
.gitignore vendored
View File

@ -14,13 +14,11 @@ coverage.xml
.gradle
.hypothesis
.mypy_cache
.additional_ci_files
/.extracted_scripts/
**/.pytorch_specified_test_cases.csv
**/.pytorch-disabled-tests.json
**/.pytorch-slow-tests.json
**/.pytorch-test-times.json
**/.pytorch-test-file-ratings.json
**/.pytorch-test-class-ratings.json
*/*.pyc
*/*.so*
*/**/__pycache__

View File

@ -40,7 +40,7 @@ REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
# using tools/ to optimize test run.
sys.path.insert(0, str(REPO_ROOT))
from tools.stats.export_test_times import TEST_TIMES_FILE
from tools.stats.import_test_stats import ADDITIONAL_CI_FILES_FOLDER, TEST_TIMES_FILE
from tools.stats.upload_metrics import add_global_metric, emit_metric
from tools.testing.target_determination.determinator import (
AggregatedHeuristics,
@ -1424,7 +1424,9 @@ def get_selected_tests(options) -> List[str]:
return selected_tests
def download_test_times(file: str = TEST_TIMES_FILE) -> Dict[str, float]:
def download_test_times(
file: str = ADDITIONAL_CI_FILES_FOLDER / TEST_TIMES_FILE,
) -> Dict[str, float]:
# Download previous test times to make sharding decisions
path = os.path.join(str(REPO_ROOT), file)
if not os.path.exists(path):
@ -1695,7 +1697,7 @@ def main():
)
return s.strip()
test_times_dict = download_test_times(TEST_TIMES_FILE)
test_times_dict = download_test_times(ADDITIONAL_CI_FILES_FOLDER / TEST_TIMES_FILE)
test_batches: List[TestBatch] = []
# Each batch will be run sequentially

View File

@ -9,16 +9,12 @@ from tools.stats.import_test_stats import (
get_test_times,
)
TEST_TIMES_FILE = ".pytorch-test-times.json"
TEST_FILE_RATINGS_FILE = ".pytorch-test-file-ratings.json"
TEST_CLASS_RATINGS_FILE = ".pytorch-test-class-ratings.json"
def main() -> None:
print(f"Exporting test times from test-infra to {TEST_TIMES_FILE}")
get_test_times(str(REPO_ROOT), filename=TEST_TIMES_FILE)
get_test_file_ratings(str(REPO_ROOT), filename=TEST_FILE_RATINGS_FILE)
get_test_class_ratings(str(REPO_ROOT), filename=TEST_CLASS_RATINGS_FILE)
print("Exporting files from test-infra")
get_test_times()
get_test_file_ratings()
get_test_class_ratings()
if __name__ == "__main__":

View File

@ -4,9 +4,11 @@ import datetime
import json
import os
import pathlib
from typing import Any, Callable, cast, Dict, List, Optional
from typing import Any, Callable, cast, Dict, List, Optional, Union
from urllib.request import urlopen
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
def get_disabled_issues() -> List[str]:
reenabled_issues = os.getenv("REENABLED_ISSUES", "")
@ -17,13 +19,17 @@ def get_disabled_issues() -> List[str]:
SLOW_TESTS_FILE = ".pytorch-slow-tests.json"
DISABLED_TESTS_FILE = ".pytorch-disabled-tests.json"
ADDITIONAL_CI_FILES_FOLDER = pathlib.Path(".additional_ci_files")
TEST_TIMES_FILE = "test-times.json"
TEST_FILE_RATINGS_FILE = "test-file-ratings.json"
TEST_CLASS_RATINGS_FILE = "test-class-ratings.json"
FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds
def fetch_and_cache(
dirpath: str,
dirpath: Union[str, pathlib.Path],
name: str,
url: str,
process_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
@ -31,6 +37,8 @@ def fetch_and_cache(
"""
This fetch and cache utils allows sharing between different process.
"""
pathlib.Path(dirpath).mkdir(exist_ok=True)
path = os.path.join(dirpath, name)
print(f"Downloading {url} to {path}")
@ -72,13 +80,12 @@ def get_slow_tests(
return {}
def get_test_times(dirpath: str, filename: str) -> Dict[str, Dict[str, float]]:
url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-times.json"
try:
return fetch_and_cache(dirpath, filename, url, lambda x: x)
except Exception:
print("Couldn't download test times...")
return {}
def get_test_times() -> Dict[str, Dict[str, float]]:
return get_from_test_infra_generated_stats(
"test-times.json",
TEST_TIMES_FILE,
"Couldn't download test times...",
)
def get_disabled_tests(
@ -104,19 +111,30 @@ def get_disabled_tests(
return {}
def get_test_file_ratings(dirpath: str, filename: str) -> Optional[Dict[str, Any]]:
url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/file_test_rating.json"
try:
return fetch_and_cache(dirpath, filename, url, lambda x: x)
except Exception:
print("Couldn't download test file ratings file, not reordering...")
return {}
def get_test_file_ratings() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"file_test_rating.json",
TEST_FILE_RATINGS_FILE,
"Couldn't download test file ratings file, not reordering...",
)
def get_test_class_ratings(dirpath: str, filename: str) -> Optional[Dict[str, Any]]:
url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/file_test_class_rating.json"
def get_test_class_ratings() -> Dict[str, Any]:
return get_from_test_infra_generated_stats(
"file_test_class_rating.json",
TEST_CLASS_RATINGS_FILE,
"Couldn't download test class ratings file, not reordering...",
)
def get_from_test_infra_generated_stats(
from_file: str, to_file: str, failure_explanation: str
) -> Dict[str, Any]:
url = f"https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/{from_file}"
try:
return fetch_and_cache(dirpath, filename, url, lambda x: x)
return fetch_and_cache(
REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER, to_file, url, lambda x: x
)
except Exception:
print("Couldn't download test class ratings file, not reordering...")
print(failure_explanation)
return {}

View File

@ -4,7 +4,10 @@ from collections import defaultdict
from typing import Any, cast, Dict, List
from warnings import warn
from tools.stats.export_test_times import TEST_FILE_RATINGS_FILE
from tools.stats.import_test_stats import (
ADDITIONAL_CI_FILES_FOLDER,
TEST_FILE_RATINGS_FILE,
)
from tools.testing.target_determination.heuristics.interface import (
HeuristicInterface,
@ -32,7 +35,7 @@ class CorrelatedWithHistoricalFailures(HeuristicInterface):
def _get_file_rating_tests() -> List[str]:
path = REPO_ROOT / TEST_FILE_RATINGS_FILE
path = REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TEST_FILE_RATINGS_FILE
if not os.path.exists(path):
print(f"could not find path {path}")
return []