[ci] Save various json files from test infra into folder (#111516)

We pull a lot of files from https://github.com/pytorch/test-infra/blob/generated-stats/stats and name them separately when we add them to the artifacts in the build, so stick them in a folder and just add that instead. Slow test and disabled test jsons remain as they were since they are pulled during the test step and do not need to be included in the artifacts during build since they are not used for sharding. Sanity checked that test times could be found for linux, mac, windows, and rocm. Pull Request resolved: https://github.com/pytorch/pytorch/pull/111516 Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
2025-10-20 21:14:14 +08:00 · 2023-10-23 20:38:25 +00:00
parent e509b162ed
commit 3b5b7ebd09
12 changed files with 60 additions and 49 deletions
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -127,9 +127,7 @@ python -c "import os, glob; os.system('python -mpip install --no-index --no-deps

    :: export test times so that potential sharded tests that'll branch off this build will use consistent data
    python tools/stats/export_test_times.py
-    copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
-    copy /Y ".pytorch-test-file-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
-    copy /Y ".pytorch-test-class-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
+    robocopy /E ".additional_ci_files" "%PYTORCH_FINAL_PACKAGE_DIR%\.additional_ci_files"

    :: Also save build/.ninja_log as an artifact
    copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
--- a/.ci/pytorch/win-test-helpers/test_python_jit_legacy.bat
+++ b/.ci/pytorch/win-test-helpers/test_python_jit_legacy.bat
@ -1,9 +1,7 @@
 call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat

 echo Copying over test times file
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"
+robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"

 pushd test

--- a/.ci/pytorch/win-test-helpers/test_python_shard.bat
+++ b/.ci/pytorch/win-test-helpers/test_python_shard.bat
@ -22,9 +22,7 @@ if "%SHARD_NUMBER%" == "1" (
 )

 echo Copying over test times file
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
-copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"
+robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"

 echo Run nn tests
 python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -623,7 +623,7 @@ jobs:
            - run:
                name: Archive artifacts into zip
                command: |
-                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
+                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
                  cp artifacts.zip /Users/distiller/workspace

      - persist_to_workspace:
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -177,7 +177,7 @@
            - run:
                name: Archive artifacts into zip
                command: |
-                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
+                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
                  cp artifacts.zip /Users/distiller/workspace

      - persist_to_workspace:
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -170,7 +170,7 @@ jobs:
      - name: Archive artifacts into zip
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
        run: |
-          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
+          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files

      - name: Store PyTorch Build Artifacts on S3
        uses: seemethere/upload-artifact-s3@v5
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@ -182,7 +182,7 @@ jobs:
      - name: Archive artifacts into zip
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
        run: |
-          zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
+          zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files

      - name: Store PyTorch Build Artifacts on GHA
        uses: actions/upload-artifact@v3
--- a/.gitignore
+++ b/.gitignore
@ -14,13 +14,11 @@ coverage.xml
 .gradle
 .hypothesis
 .mypy_cache
+.additional_ci_files
 /.extracted_scripts/
 **/.pytorch_specified_test_cases.csv
 **/.pytorch-disabled-tests.json
 **/.pytorch-slow-tests.json
-**/.pytorch-test-times.json
-**/.pytorch-test-file-ratings.json
-**/.pytorch-test-class-ratings.json
 */*.pyc
 */*.so*
 */**/__pycache__
--- a/test/run_test.py
+++ b/test/run_test.py
@ -40,7 +40,7 @@ REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent

 # using tools/ to optimize test run.
 sys.path.insert(0, str(REPO_ROOT))
-from tools.stats.export_test_times import TEST_TIMES_FILE
+from tools.stats.import_test_stats import ADDITIONAL_CI_FILES_FOLDER, TEST_TIMES_FILE
 from tools.stats.upload_metrics import add_global_metric, emit_metric
 from tools.testing.target_determination.determinator import (
    AggregatedHeuristics,
@ -1424,7 +1424,9 @@ def get_selected_tests(options) -> List[str]:
    return selected_tests


-def download_test_times(file: str = TEST_TIMES_FILE) -> Dict[str, float]:
+def download_test_times(
+    file: str = ADDITIONAL_CI_FILES_FOLDER / TEST_TIMES_FILE,
+) -> Dict[str, float]:
    # Download previous test times to make sharding decisions
    path = os.path.join(str(REPO_ROOT), file)
    if not os.path.exists(path):
@ -1695,7 +1697,7 @@ def main():
            )
            return s.strip()

-    test_times_dict = download_test_times(TEST_TIMES_FILE)
+    test_times_dict = download_test_times(ADDITIONAL_CI_FILES_FOLDER / TEST_TIMES_FILE)
    test_batches: List[TestBatch] = []

    # Each batch will be run sequentially
--- a/tools/stats/export_test_times.py
+++ b/tools/stats/export_test_times.py
@ -9,16 +9,12 @@ from tools.stats.import_test_stats import (
    get_test_times,
 )

-TEST_TIMES_FILE = ".pytorch-test-times.json"
-TEST_FILE_RATINGS_FILE = ".pytorch-test-file-ratings.json"
-TEST_CLASS_RATINGS_FILE = ".pytorch-test-class-ratings.json"
-

 def main() -> None:
-    print(f"Exporting test times from test-infra to {TEST_TIMES_FILE}")
-    get_test_times(str(REPO_ROOT), filename=TEST_TIMES_FILE)
-    get_test_file_ratings(str(REPO_ROOT), filename=TEST_FILE_RATINGS_FILE)
-    get_test_class_ratings(str(REPO_ROOT), filename=TEST_CLASS_RATINGS_FILE)
+    print("Exporting files from test-infra")
+    get_test_times()
+    get_test_file_ratings()
+    get_test_class_ratings()


 if __name__ == "__main__":
--- a/tools/stats/import_test_stats.py
+++ b/tools/stats/import_test_stats.py
@ -4,9 +4,11 @@ import datetime
 import json
 import os
 import pathlib
-from typing import Any, Callable, cast, Dict, List, Optional
+from typing import Any, Callable, cast, Dict, List, Optional, Union
 from urllib.request import urlopen

+REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
+

 def get_disabled_issues() -> List[str]:
    reenabled_issues = os.getenv("REENABLED_ISSUES", "")
@ -17,13 +19,17 @@ def get_disabled_issues() -> List[str]:

 SLOW_TESTS_FILE = ".pytorch-slow-tests.json"
 DISABLED_TESTS_FILE = ".pytorch-disabled-tests.json"
+ADDITIONAL_CI_FILES_FOLDER = pathlib.Path(".additional_ci_files")
+TEST_TIMES_FILE = "test-times.json"
+TEST_FILE_RATINGS_FILE = "test-file-ratings.json"
+TEST_CLASS_RATINGS_FILE = "test-class-ratings.json"


 FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds


 def fetch_and_cache(
-    dirpath: str,
+    dirpath: Union[str, pathlib.Path],
    name: str,
    url: str,
    process_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
@ -31,6 +37,8 @@ def fetch_and_cache(
    """
    This fetch and cache utils allows sharing between different process.
    """
+    pathlib.Path(dirpath).mkdir(exist_ok=True)
+
    path = os.path.join(dirpath, name)
    print(f"Downloading {url} to {path}")

@ -72,13 +80,12 @@ def get_slow_tests(
        return {}


-def get_test_times(dirpath: str, filename: str) -> Dict[str, Dict[str, float]]:
-    url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-times.json"
-    try:
-        return fetch_and_cache(dirpath, filename, url, lambda x: x)
-    except Exception:
-        print("Couldn't download test times...")
-        return {}
+def get_test_times() -> Dict[str, Dict[str, float]]:
+    return get_from_test_infra_generated_stats(
+        "test-times.json",
+        TEST_TIMES_FILE,
+        "Couldn't download test times...",
+    )


 def get_disabled_tests(
@ -104,19 +111,30 @@ def get_disabled_tests(
        return {}


-def get_test_file_ratings(dirpath: str, filename: str) -> Optional[Dict[str, Any]]:
-    url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/file_test_rating.json"
-    try:
-        return fetch_and_cache(dirpath, filename, url, lambda x: x)
-    except Exception:
-        print("Couldn't download test file ratings file, not reordering...")
-        return {}
+def get_test_file_ratings() -> Dict[str, Any]:
+    return get_from_test_infra_generated_stats(
+        "file_test_rating.json",
+        TEST_FILE_RATINGS_FILE,
+        "Couldn't download test file ratings file, not reordering...",
+    )


-def get_test_class_ratings(dirpath: str, filename: str) -> Optional[Dict[str, Any]]:
-    url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/file_test_class_rating.json"
+def get_test_class_ratings() -> Dict[str, Any]:
+    return get_from_test_infra_generated_stats(
+        "file_test_class_rating.json",
+        TEST_CLASS_RATINGS_FILE,
+        "Couldn't download test class ratings file, not reordering...",
+    )
+
+
+def get_from_test_infra_generated_stats(
+    from_file: str, to_file: str, failure_explanation: str
+) -> Dict[str, Any]:
+    url = f"https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/{from_file}"
    try:
-        return fetch_and_cache(dirpath, filename, url, lambda x: x)
+        return fetch_and_cache(
+            REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER, to_file, url, lambda x: x
+        )
    except Exception:
-        print("Couldn't download test class ratings file, not reordering...")
+        print(failure_explanation)
        return {}
--- a/tools/testing/target_determination/heuristics/correlated_with_historical_failures.py
+++ b/tools/testing/target_determination/heuristics/correlated_with_historical_failures.py
@ -4,7 +4,10 @@ from collections import defaultdict
 from typing import Any, cast, Dict, List
 from warnings import warn

-from tools.stats.export_test_times import TEST_FILE_RATINGS_FILE
+from tools.stats.import_test_stats import (
+    ADDITIONAL_CI_FILES_FOLDER,
+    TEST_FILE_RATINGS_FILE,
+)

 from tools.testing.target_determination.heuristics.interface import (
    HeuristicInterface,
@ -32,7 +35,7 @@ class CorrelatedWithHistoricalFailures(HeuristicInterface):


 def _get_file_rating_tests() -> List[str]:
-    path = REPO_ROOT / TEST_FILE_RATINGS_FILE
+    path = REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TEST_FILE_RATINGS_FILE
    if not os.path.exists(path):
        print(f"could not find path {path}")
        return []