mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[td] Consistent pytest cache (#113804)
Move the pytest cache downloading into the build step and store it in additional ci files so that it stays consistent during sharding. Only build env is taken into account now instead of also test config since we might not have the test config during build time, making it less specific, but I also think this might be better since tests are likely to fail across the same test config (I also think it might be worth not even looking at build env but thats a different topic) Each cache upload should only include information from the current run. Do not merge current cache with downloaded cache during upload (shouldn't matter anyways since the downloaded cache won't exist at the time) From what I cant tell of the s3 retention policy, pytest cache files will be deleted after 30 days (cc @ZainRizvi to confirm), so we never have to worry about space or pulling old versions. Pull Request resolved: https://github.com/pytorch/pytorch/pull/113804 Approved by: https://github.com/ZainRizvi
This commit is contained in:
committed by
PyTorch MergeBot
parent
033d7b670a
commit
dab272eed8
11
.github/actions/pytest-cache-upload/action.yml
vendored
11
.github/actions/pytest-cache-upload/action.yml
vendored
@ -10,6 +10,13 @@ inputs:
|
||||
description: Shard number for the current job
|
||||
required: false
|
||||
default: "0"
|
||||
sha:
|
||||
description: SHA for the commit
|
||||
required: true
|
||||
test_config:
|
||||
description: Name of the test config
|
||||
required: false
|
||||
default: "default"
|
||||
job_identifier:
|
||||
description: Text that uniquely identifies a given job type within a workflow. All shards of a job should share the same job identifier.
|
||||
required: true
|
||||
@ -33,6 +40,8 @@ runs:
|
||||
env:
|
||||
CACHE_DIR: ${{ inputs.cache_dir }}
|
||||
JOB_IDENTIFIER: ${{ inputs.job_identifier }}
|
||||
SHA: ${{ inputs.sha }}
|
||||
TEST_CONFIG: ${{ inputs.test_config }}
|
||||
SHARD: ${{ inputs.shard }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
@ -41,6 +50,8 @@ runs:
|
||||
--cache_dir $GITHUB_WORKSPACE/$CACHE_DIR \
|
||||
--pr_identifier $GITHUB_REF \
|
||||
--job_identifier $JOB_IDENTIFIER \
|
||||
--sha $SHA \
|
||||
--test_config $TEST_CONFIG \
|
||||
--shard $SHARD \
|
||||
--repo $REPO \
|
||||
--temp_dir $RUNNER_TEMP \
|
||||
|
8
.github/scripts/pytest_cache.py
vendored
8
.github/scripts/pytest_cache.py
vendored
@ -38,6 +38,12 @@ def parse_args() -> argparse.Namespace:
|
||||
required=True,
|
||||
help="A unique job identifier that should be the same for all runs of job",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sha", required="--upload" in sys.argv, help="SHA of the commit"
|
||||
) # Only required for upload
|
||||
parser.add_argument(
|
||||
"--test_config", required="--upload" in sys.argv, help="The test config"
|
||||
) # Only required for upload
|
||||
parser.add_argument(
|
||||
"--shard", required="--upload" in sys.argv, help="The shard id"
|
||||
) # Only required for upload
|
||||
@ -84,6 +90,8 @@ def main() -> None:
|
||||
pr_identifier=pr_identifier,
|
||||
repo=repo,
|
||||
job_identifier=args.job_identifier,
|
||||
sha=args.sha,
|
||||
test_config=args.test_config,
|
||||
shard=args.shard,
|
||||
cache_dir=cache_dir,
|
||||
bucket=args.bucket,
|
||||
|
56
.github/scripts/pytest_caching_utils.py
vendored
56
.github/scripts/pytest_caching_utils.py
vendored
@ -56,6 +56,8 @@ def upload_pytest_cache(
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
sha: str,
|
||||
test_config: str,
|
||||
shard: str,
|
||||
cache_dir: Path,
|
||||
temp_dir: Path,
|
||||
@ -79,25 +81,11 @@ def upload_pytest_cache(
|
||||
if not bucket:
|
||||
bucket = BUCKET
|
||||
|
||||
# Merge the current cache with any caches from previous runs before uploading
|
||||
# We only need to merge it with the cache for the same shard (which will have already been downloaded if it exists)
|
||||
# since the other shards will handle themselves
|
||||
shard_cache_path = _get_temp_cache_dir_path(
|
||||
temp_dir, pr_identifier, repo, job_identifier, shard
|
||||
)
|
||||
|
||||
if shard_cache_path.is_dir():
|
||||
_merge_pytest_caches(shard_cache_path, cache_dir)
|
||||
|
||||
#
|
||||
# Upload the cache
|
||||
#
|
||||
|
||||
obj_key_prefix = _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
|
||||
# This doesn't include the zip file extension. That'll get added later
|
||||
zip_file_path = temp_dir / ZIP_UPLOAD / obj_key_prefix
|
||||
|
||||
zip_file_path = zip_folder(cache_dir, zip_file_path)
|
||||
obj_key_prefix = _get_s3_key_prefix(
|
||||
pr_identifier, repo, job_identifier, sha, test_config, shard
|
||||
)
|
||||
zip_file_path = zip_folder(cache_dir, temp_dir / ZIP_UPLOAD / obj_key_prefix)
|
||||
obj_key = f"{obj_key_prefix}{os.path.splitext(zip_file_path)[1]}" # Keep the new file extension
|
||||
upload_file_to_s3(zip_file_path, bucket, obj_key)
|
||||
|
||||
@ -136,38 +124,22 @@ def download_pytest_cache(
|
||||
)
|
||||
|
||||
for downloaded_zip in downloads:
|
||||
# the file name of the zip is the shard id
|
||||
shard = os.path.splitext(os.path.basename(downloaded_zip))[0]
|
||||
cache_dir_for_shard = _get_temp_cache_dir_path(
|
||||
temp_dir, pr_identifier, repo, job_identifier, shard
|
||||
# Unzip into random folder, then merge with the current cache
|
||||
cache_dir_for_shard = (
|
||||
temp_dir / UNZIPPED_CACHES / os.urandom(16).hex() / PYTEST_CACHE_DIR_NAME
|
||||
)
|
||||
|
||||
unzip_folder(downloaded_zip, cache_dir_for_shard)
|
||||
print(
|
||||
f"Merging cache for job_identifier `{job_identifier}`, shard `{shard}` into `{dest_cache_dir}`"
|
||||
)
|
||||
print(f"Merging cache from {downloaded_zip}")
|
||||
_merge_pytest_caches(cache_dir_for_shard, dest_cache_dir)
|
||||
|
||||
|
||||
def _get_temp_cache_dir_path(
|
||||
temp_dir: Path,
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
shard: str,
|
||||
) -> Path:
|
||||
return (
|
||||
temp_dir
|
||||
/ UNZIPPED_CACHES
|
||||
/ _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
|
||||
/ PYTEST_CACHE_DIR_NAME
|
||||
)
|
||||
|
||||
|
||||
def _get_s3_key_prefix(
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
sha: str = "",
|
||||
test_config: str = "",
|
||||
shard: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
@ -176,6 +148,10 @@ def _get_s3_key_prefix(
|
||||
"""
|
||||
prefix = f"{PYTEST_CACHE_KEY_PREFIX}/{repo.owner}/{repo.name}/{pr_identifier}/{sanitize_for_s3(job_identifier)}"
|
||||
|
||||
if sha:
|
||||
prefix += f"/{sha}"
|
||||
if test_config:
|
||||
prefix += f"/{sanitize_for_s3(test_config)}"
|
||||
if shard:
|
||||
prefix += f"/{shard}"
|
||||
|
||||
|
7
.github/workflows/_linux-build.yml
vendored
7
.github/workflows/_linux-build.yml
vendored
@ -118,6 +118,13 @@ jobs:
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
||||
|
||||
- name: Download pytest cache
|
||||
uses: ./.github/actions/pytest-cache-download
|
||||
continue-on-error: true
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Build
|
||||
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
|
||||
id: build
|
||||
|
11
.github/workflows/_linux-test.yml
vendored
11
.github/workflows/_linux-test.yml
vendored
@ -130,13 +130,6 @@ jobs:
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
||||
|
||||
- name: Download pytest cache
|
||||
uses: ./.github/actions/pytest-cache-download
|
||||
continue-on-error: true
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
|
||||
|
||||
- name: Set Test step time
|
||||
id: test-timeout
|
||||
shell: bash
|
||||
@ -256,7 +249,9 @@ jobs:
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
shard: ${{ matrix.shard }}
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
|
||||
sha: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
test_config: ${{ matrix.config }}
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Print remaining test logs
|
||||
shell: bash
|
||||
|
7
.github/workflows/_win-build.yml
vendored
7
.github/workflows/_win-build.yml
vendored
@ -107,6 +107,13 @@ jobs:
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
||||
|
||||
- name: Download pytest cache
|
||||
uses: ./.github/actions/pytest-cache-download
|
||||
continue-on-error: true
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Build
|
||||
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
|
||||
id: build
|
||||
|
11
.github/workflows/_win-test.yml
vendored
11
.github/workflows/_win-test.yml
vendored
@ -132,13 +132,6 @@ jobs:
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
||||
|
||||
- name: Download pytest cache
|
||||
uses: ./.github/actions/pytest-cache-download
|
||||
continue-on-error: true
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
|
||||
|
||||
- name: Test
|
||||
id: test
|
||||
shell: bash
|
||||
@ -190,7 +183,9 @@ jobs:
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
shard: ${{ matrix.shard }}
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
|
||||
sha: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
test_config: ${{ matrix.config }}
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Print remaining test logs
|
||||
shell: bash
|
||||
|
@ -4,6 +4,7 @@ import sys
|
||||
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.append(str(REPO_ROOT))
|
||||
from tools.stats.import_test_stats import (
|
||||
copy_pytest_cache,
|
||||
get_td_heuristic_historial_edited_files_json,
|
||||
get_td_heuristic_profiling_json,
|
||||
get_test_class_ratings,
|
||||
@ -21,6 +22,7 @@ def main() -> None:
|
||||
get_test_class_ratings()
|
||||
get_td_heuristic_historial_edited_files_json()
|
||||
get_td_heuristic_profiling_json()
|
||||
copy_pytest_cache()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -4,6 +4,7 @@ import datetime
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Union
|
||||
from urllib.request import urlopen
|
||||
|
||||
@ -26,6 +27,7 @@ TEST_FILE_RATINGS_FILE = "test-file-ratings.json"
|
||||
TEST_CLASS_RATINGS_FILE = "test-class-ratings.json"
|
||||
TD_HEURISTIC_PROFILING_FILE = "td_heuristic_profiling.json"
|
||||
TD_HEURISTIC_HISTORICAL_EDITED_FILES = "td_heuristic_historical_edited_files.json"
|
||||
TD_HEURISTIC_PREVIOUSLY_FAILED = "previous_failures.json"
|
||||
|
||||
FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds
|
||||
|
||||
@ -153,6 +155,16 @@ def get_td_heuristic_profiling_json() -> Dict[str, Any]:
|
||||
)
|
||||
|
||||
|
||||
def copy_pytest_cache() -> None:
|
||||
original_path = REPO_ROOT / ".pytest_cache/v/cache/lastfailed"
|
||||
if not original_path.exists():
|
||||
return
|
||||
shutil.copyfile(
|
||||
original_path,
|
||||
REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PREVIOUSLY_FAILED,
|
||||
)
|
||||
|
||||
|
||||
def get_from_test_infra_generated_stats(
|
||||
from_file: str, to_file: str, failure_explanation: str
|
||||
) -> Dict[str, Any]:
|
||||
|
@ -6,10 +6,10 @@ import unittest
|
||||
from typing import Any, Dict, Set
|
||||
from unittest import mock
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
|
||||
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent.parent
|
||||
try:
|
||||
# using tools/ to optimize test run.
|
||||
sys.path.append(str(REPO_ROOT))
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from tools.test.heuristics.heuristics_test_mixin import HeuristicsTestMixin
|
||||
from tools.testing.target_determination.determinator import (
|
||||
@ -19,10 +19,11 @@ try:
|
||||
)
|
||||
from tools.testing.target_determination.heuristics import HEURISTICS
|
||||
from tools.testing.target_determination.heuristics.previously_failed_in_pr import (
|
||||
_get_previously_failing_tests,
|
||||
get_previous_failures,
|
||||
)
|
||||
from tools.testing.test_run import TestRun, TestRuns
|
||||
|
||||
sys.path.remove(str(REPO_ROOT))
|
||||
except ModuleNotFoundError:
|
||||
print("Can't import required modules, exiting")
|
||||
sys.exit(1)
|
||||
@ -36,20 +37,20 @@ def mocked_file(contents: Dict[Any, Any]) -> io.IOBase:
|
||||
|
||||
|
||||
class TestParsePrevTests(HeuristicsTestMixin):
|
||||
@mock.patch("pathlib.Path.exists", return_value=False)
|
||||
@mock.patch("os.path.exists", return_value=False)
|
||||
def test_cache_does_not_exist(self, mock_exists: Any) -> None:
|
||||
expected_failing_test_files: Set[str] = set()
|
||||
|
||||
found_tests = _get_previously_failing_tests()
|
||||
found_tests = get_previous_failures()
|
||||
|
||||
self.assertSetEqual(expected_failing_test_files, found_tests)
|
||||
|
||||
@mock.patch("pathlib.Path.exists", return_value=True)
|
||||
@mock.patch("os.path.exists", return_value=True)
|
||||
@mock.patch("builtins.open", return_value=mocked_file({"": True}))
|
||||
def test_empty_cache(self, mock_exists: Any, mock_open: Any) -> None:
|
||||
expected_failing_test_files: Set[str] = set()
|
||||
|
||||
found_tests = _get_previously_failing_tests()
|
||||
found_tests = get_previous_failures()
|
||||
|
||||
self.assertSetEqual(expected_failing_test_files, found_tests)
|
||||
mock_open.assert_called()
|
||||
@ -61,19 +62,19 @@ class TestParsePrevTests(HeuristicsTestMixin):
|
||||
"test/test_bar.py::TestBar::test_fun_copy[25]": True,
|
||||
}
|
||||
|
||||
@mock.patch("pathlib.Path.exists", return_value=True)
|
||||
@mock.patch("os.path.exists", return_value=True)
|
||||
@mock.patch(
|
||||
"builtins.open",
|
||||
return_value=mocked_file(lastfailed_with_multiple_tests_per_file),
|
||||
)
|
||||
def test_dedupes_failing_test_files(self, mock_exists: Any, mock_open: Any) -> None:
|
||||
expected_failing_test_files = {"test_car", "test_bar", "test_far"}
|
||||
found_tests = _get_previously_failing_tests()
|
||||
found_tests = get_previous_failures()
|
||||
|
||||
self.assertSetEqual(expected_failing_test_files, found_tests)
|
||||
|
||||
@mock.patch(
|
||||
"tools.testing.target_determination.heuristics.previously_failed_in_pr._get_previously_failing_tests",
|
||||
"tools.testing.target_determination.heuristics.previously_failed_in_pr.get_previous_failures",
|
||||
return_value={"test4"},
|
||||
)
|
||||
@mock.patch(
|
||||
|
@ -1,7 +1,12 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Set
|
||||
from warnings import warn
|
||||
|
||||
from tools.stats.import_test_stats import (
|
||||
ADDITIONAL_CI_FILES_FOLDER,
|
||||
TD_HEURISTIC_PREVIOUSLY_FAILED,
|
||||
)
|
||||
|
||||
from tools.testing.target_determination.heuristics.interface import (
|
||||
HeuristicInterface,
|
||||
@ -11,6 +16,8 @@ from tools.testing.target_determination.heuristics.utils import (
|
||||
python_test_file_to_test_name,
|
||||
)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent.parent
|
||||
|
||||
|
||||
class PreviouslyFailedInPR(HeuristicInterface):
|
||||
def __init__(self, **kwargs: Dict[str, Any]):
|
||||
@ -19,7 +26,7 @@ class PreviouslyFailedInPR(HeuristicInterface):
|
||||
def get_test_priorities(self, tests: List[str]) -> TestPrioritizations:
|
||||
# Tests must always be returned in a deterministic order.
|
||||
# Otherwise it breaks our test sharding logic
|
||||
critical_tests = sorted(_get_previously_failing_tests())
|
||||
critical_tests = sorted(get_previous_failures())
|
||||
test_rankings = TestPrioritizations(
|
||||
tests_being_ranked=tests, high_relevance=critical_tests
|
||||
)
|
||||
@ -27,25 +34,19 @@ class PreviouslyFailedInPR(HeuristicInterface):
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
critical_tests = _get_previously_failing_tests()
|
||||
critical_tests = get_previous_failures()
|
||||
return {test: 1 for test in critical_tests if test in tests}
|
||||
|
||||
|
||||
def _get_previously_failing_tests() -> Set[str]:
|
||||
PYTEST_FAILED_TESTS_CACHE_FILE_PATH = Path(".pytest_cache/v/cache/lastfailed")
|
||||
|
||||
if not PYTEST_FAILED_TESTS_CACHE_FILE_PATH.exists():
|
||||
warn(
|
||||
f"No pytorch cache found at {PYTEST_FAILED_TESTS_CACHE_FILE_PATH.absolute()}"
|
||||
)
|
||||
def get_previous_failures() -> Set[str]:
|
||||
path = REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PREVIOUSLY_FAILED
|
||||
if not os.path.exists(path):
|
||||
print(f"could not find path {path}")
|
||||
return set()
|
||||
|
||||
with open(PYTEST_FAILED_TESTS_CACHE_FILE_PATH) as f:
|
||||
last_failed_tests = json.load(f)
|
||||
|
||||
prioritized_tests = _parse_prev_failing_test_files(last_failed_tests)
|
||||
|
||||
return python_test_file_to_test_name(prioritized_tests)
|
||||
with open(path) as f:
|
||||
return python_test_file_to_test_name(
|
||||
_parse_prev_failing_test_files(json.load(f))
|
||||
)
|
||||
|
||||
|
||||
def _parse_prev_failing_test_files(last_failed_tests: Dict[str, bool]) -> Set[str]:
|
||||
|
Reference in New Issue
Block a user