[td] Consistent pytest cache (#113804)

Move the pytest cache downloading into the build step and store it in additional ci files so that it stays consistent during sharding.

Only build env is taken into account now instead of also test config since we might not have the test config during build time, making it less specific, but I also think this might be better since tests are likely to fail across the same test config (I also think it might be worth not even looking at build env but thats a different topic)

Each cache upload should only include information from the current run.  Do not merge current cache with downloaded cache during upload (shouldn't matter anyways since the downloaded cache won't exist at the time)

From what I cant tell of the s3 retention policy, pytest cache files will be deleted after 30 days (cc @ZainRizvi to confirm), so we never have to worry about space or pulling old versions.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/113804
Approved by: https://github.com/ZainRizvi
This commit is contained in:
Catherine Lee
2023-11-17 23:45:43 +00:00
committed by PyTorch MergeBot
parent 033d7b670a
commit dab272eed8
11 changed files with 98 additions and 83 deletions

View File

@ -10,6 +10,13 @@ inputs:
description: Shard number for the current job
required: false
default: "0"
sha:
description: SHA for the commit
required: true
test_config:
description: Name of the test config
required: false
default: "default"
job_identifier:
description: Text that uniquely identifies a given job type within a workflow. All shards of a job should share the same job identifier.
required: true
@ -33,6 +40,8 @@ runs:
env:
CACHE_DIR: ${{ inputs.cache_dir }}
JOB_IDENTIFIER: ${{ inputs.job_identifier }}
SHA: ${{ inputs.sha }}
TEST_CONFIG: ${{ inputs.test_config }}
SHARD: ${{ inputs.shard }}
REPO: ${{ github.repository }}
run: |
@ -41,6 +50,8 @@ runs:
--cache_dir $GITHUB_WORKSPACE/$CACHE_DIR \
--pr_identifier $GITHUB_REF \
--job_identifier $JOB_IDENTIFIER \
--sha $SHA \
--test_config $TEST_CONFIG \
--shard $SHARD \
--repo $REPO \
--temp_dir $RUNNER_TEMP \

View File

@ -38,6 +38,12 @@ def parse_args() -> argparse.Namespace:
required=True,
help="A unique job identifier that should be the same for all runs of job",
)
parser.add_argument(
"--sha", required="--upload" in sys.argv, help="SHA of the commit"
) # Only required for upload
parser.add_argument(
"--test_config", required="--upload" in sys.argv, help="The test config"
) # Only required for upload
parser.add_argument(
"--shard", required="--upload" in sys.argv, help="The shard id"
) # Only required for upload
@ -84,6 +90,8 @@ def main() -> None:
pr_identifier=pr_identifier,
repo=repo,
job_identifier=args.job_identifier,
sha=args.sha,
test_config=args.test_config,
shard=args.shard,
cache_dir=cache_dir,
bucket=args.bucket,

View File

@ -56,6 +56,8 @@ def upload_pytest_cache(
pr_identifier: PRIdentifier,
repo: GithubRepo,
job_identifier: str,
sha: str,
test_config: str,
shard: str,
cache_dir: Path,
temp_dir: Path,
@ -79,25 +81,11 @@ def upload_pytest_cache(
if not bucket:
bucket = BUCKET
# Merge the current cache with any caches from previous runs before uploading
# We only need to merge it with the cache for the same shard (which will have already been downloaded if it exists)
# since the other shards will handle themselves
shard_cache_path = _get_temp_cache_dir_path(
temp_dir, pr_identifier, repo, job_identifier, shard
)
if shard_cache_path.is_dir():
_merge_pytest_caches(shard_cache_path, cache_dir)
#
# Upload the cache
#
obj_key_prefix = _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
# This doesn't include the zip file extension. That'll get added later
zip_file_path = temp_dir / ZIP_UPLOAD / obj_key_prefix
zip_file_path = zip_folder(cache_dir, zip_file_path)
obj_key_prefix = _get_s3_key_prefix(
pr_identifier, repo, job_identifier, sha, test_config, shard
)
zip_file_path = zip_folder(cache_dir, temp_dir / ZIP_UPLOAD / obj_key_prefix)
obj_key = f"{obj_key_prefix}{os.path.splitext(zip_file_path)[1]}" # Keep the new file extension
upload_file_to_s3(zip_file_path, bucket, obj_key)
@ -136,38 +124,22 @@ def download_pytest_cache(
)
for downloaded_zip in downloads:
# the file name of the zip is the shard id
shard = os.path.splitext(os.path.basename(downloaded_zip))[0]
cache_dir_for_shard = _get_temp_cache_dir_path(
temp_dir, pr_identifier, repo, job_identifier, shard
# Unzip into random folder, then merge with the current cache
cache_dir_for_shard = (
temp_dir / UNZIPPED_CACHES / os.urandom(16).hex() / PYTEST_CACHE_DIR_NAME
)
unzip_folder(downloaded_zip, cache_dir_for_shard)
print(
f"Merging cache for job_identifier `{job_identifier}`, shard `{shard}` into `{dest_cache_dir}`"
)
print(f"Merging cache from {downloaded_zip}")
_merge_pytest_caches(cache_dir_for_shard, dest_cache_dir)
def _get_temp_cache_dir_path(
temp_dir: Path,
pr_identifier: PRIdentifier,
repo: GithubRepo,
job_identifier: str,
shard: str,
) -> Path:
return (
temp_dir
/ UNZIPPED_CACHES
/ _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
/ PYTEST_CACHE_DIR_NAME
)
def _get_s3_key_prefix(
pr_identifier: PRIdentifier,
repo: GithubRepo,
job_identifier: str,
sha: str = "",
test_config: str = "",
shard: str = "",
) -> str:
"""
@ -176,6 +148,10 @@ def _get_s3_key_prefix(
"""
prefix = f"{PYTEST_CACHE_KEY_PREFIX}/{repo.owner}/{repo.name}/{pr_identifier}/{sanitize_for_s3(job_identifier)}"
if sha:
prefix += f"/{sha}"
if test_config:
prefix += f"/{sanitize_for_s3(test_config)}"
if shard:
prefix += f"/{shard}"

View File

@ -118,6 +118,13 @@ jobs:
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Build
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
id: build

View File

@ -130,13 +130,6 @@ jobs:
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
- name: Set Test step time
id: test-timeout
shell: bash
@ -256,7 +249,9 @@ jobs:
with:
cache_dir: .pytest_cache
shard: ${{ matrix.shard }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
sha: ${{ github.event.pull_request.head.sha || github.sha }}
test_config: ${{ matrix.config }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Print remaining test logs
shell: bash

View File

@ -107,6 +107,13 @@ jobs:
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Build
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
id: build

View File

@ -132,13 +132,6 @@ jobs:
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
- name: Test
id: test
shell: bash
@ -190,7 +183,9 @@ jobs:
with:
cache_dir: .pytest_cache
shard: ${{ matrix.shard }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}_${{ github.job }}_${{ matrix.config }}
sha: ${{ github.event.pull_request.head.sha || github.sha }}
test_config: ${{ matrix.config }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Print remaining test logs
shell: bash

View File

@ -4,6 +4,7 @@ import sys
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
sys.path.append(str(REPO_ROOT))
from tools.stats.import_test_stats import (
copy_pytest_cache,
get_td_heuristic_historial_edited_files_json,
get_td_heuristic_profiling_json,
get_test_class_ratings,
@ -21,6 +22,7 @@ def main() -> None:
get_test_class_ratings()
get_td_heuristic_historial_edited_files_json()
get_td_heuristic_profiling_json()
copy_pytest_cache()
if __name__ == "__main__":

View File

@ -4,6 +4,7 @@ import datetime
import json
import os
import pathlib
import shutil
from typing import Any, Callable, cast, Dict, List, Optional, Union
from urllib.request import urlopen
@ -26,6 +27,7 @@ TEST_FILE_RATINGS_FILE = "test-file-ratings.json"
TEST_CLASS_RATINGS_FILE = "test-class-ratings.json"
TD_HEURISTIC_PROFILING_FILE = "td_heuristic_profiling.json"
TD_HEURISTIC_HISTORICAL_EDITED_FILES = "td_heuristic_historical_edited_files.json"
TD_HEURISTIC_PREVIOUSLY_FAILED = "previous_failures.json"
FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds
@ -153,6 +155,16 @@ def get_td_heuristic_profiling_json() -> Dict[str, Any]:
)
def copy_pytest_cache() -> None:
original_path = REPO_ROOT / ".pytest_cache/v/cache/lastfailed"
if not original_path.exists():
return
shutil.copyfile(
original_path,
REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PREVIOUSLY_FAILED,
)
def get_from_test_infra_generated_stats(
from_file: str, to_file: str, failure_explanation: str
) -> Dict[str, Any]:

View File

@ -6,10 +6,10 @@ import unittest
from typing import Any, Dict, Set
from unittest import mock
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent.parent
try:
# using tools/ to optimize test run.
sys.path.append(str(REPO_ROOT))
sys.path.insert(0, str(REPO_ROOT))
from tools.test.heuristics.heuristics_test_mixin import HeuristicsTestMixin
from tools.testing.target_determination.determinator import (
@ -19,10 +19,11 @@ try:
)
from tools.testing.target_determination.heuristics import HEURISTICS
from tools.testing.target_determination.heuristics.previously_failed_in_pr import (
_get_previously_failing_tests,
get_previous_failures,
)
from tools.testing.test_run import TestRun, TestRuns
sys.path.remove(str(REPO_ROOT))
except ModuleNotFoundError:
print("Can't import required modules, exiting")
sys.exit(1)
@ -36,20 +37,20 @@ def mocked_file(contents: Dict[Any, Any]) -> io.IOBase:
class TestParsePrevTests(HeuristicsTestMixin):
@mock.patch("pathlib.Path.exists", return_value=False)
@mock.patch("os.path.exists", return_value=False)
def test_cache_does_not_exist(self, mock_exists: Any) -> None:
expected_failing_test_files: Set[str] = set()
found_tests = _get_previously_failing_tests()
found_tests = get_previous_failures()
self.assertSetEqual(expected_failing_test_files, found_tests)
@mock.patch("pathlib.Path.exists", return_value=True)
@mock.patch("os.path.exists", return_value=True)
@mock.patch("builtins.open", return_value=mocked_file({"": True}))
def test_empty_cache(self, mock_exists: Any, mock_open: Any) -> None:
expected_failing_test_files: Set[str] = set()
found_tests = _get_previously_failing_tests()
found_tests = get_previous_failures()
self.assertSetEqual(expected_failing_test_files, found_tests)
mock_open.assert_called()
@ -61,19 +62,19 @@ class TestParsePrevTests(HeuristicsTestMixin):
"test/test_bar.py::TestBar::test_fun_copy[25]": True,
}
@mock.patch("pathlib.Path.exists", return_value=True)
@mock.patch("os.path.exists", return_value=True)
@mock.patch(
"builtins.open",
return_value=mocked_file(lastfailed_with_multiple_tests_per_file),
)
def test_dedupes_failing_test_files(self, mock_exists: Any, mock_open: Any) -> None:
expected_failing_test_files = {"test_car", "test_bar", "test_far"}
found_tests = _get_previously_failing_tests()
found_tests = get_previous_failures()
self.assertSetEqual(expected_failing_test_files, found_tests)
@mock.patch(
"tools.testing.target_determination.heuristics.previously_failed_in_pr._get_previously_failing_tests",
"tools.testing.target_determination.heuristics.previously_failed_in_pr.get_previous_failures",
return_value={"test4"},
)
@mock.patch(

View File

@ -1,7 +1,12 @@
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Set
from warnings import warn
from tools.stats.import_test_stats import (
ADDITIONAL_CI_FILES_FOLDER,
TD_HEURISTIC_PREVIOUSLY_FAILED,
)
from tools.testing.target_determination.heuristics.interface import (
HeuristicInterface,
@ -11,6 +16,8 @@ from tools.testing.target_determination.heuristics.utils import (
python_test_file_to_test_name,
)
REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent.parent
class PreviouslyFailedInPR(HeuristicInterface):
def __init__(self, **kwargs: Dict[str, Any]):
@ -19,7 +26,7 @@ class PreviouslyFailedInPR(HeuristicInterface):
def get_test_priorities(self, tests: List[str]) -> TestPrioritizations:
# Tests must always be returned in a deterministic order.
# Otherwise it breaks our test sharding logic
critical_tests = sorted(_get_previously_failing_tests())
critical_tests = sorted(get_previous_failures())
test_rankings = TestPrioritizations(
tests_being_ranked=tests, high_relevance=critical_tests
)
@ -27,25 +34,19 @@ class PreviouslyFailedInPR(HeuristicInterface):
return test_rankings
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
critical_tests = _get_previously_failing_tests()
critical_tests = get_previous_failures()
return {test: 1 for test in critical_tests if test in tests}
def _get_previously_failing_tests() -> Set[str]:
PYTEST_FAILED_TESTS_CACHE_FILE_PATH = Path(".pytest_cache/v/cache/lastfailed")
if not PYTEST_FAILED_TESTS_CACHE_FILE_PATH.exists():
warn(
f"No pytorch cache found at {PYTEST_FAILED_TESTS_CACHE_FILE_PATH.absolute()}"
)
def get_previous_failures() -> Set[str]:
path = REPO_ROOT / ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PREVIOUSLY_FAILED
if not os.path.exists(path):
print(f"could not find path {path}")
return set()
with open(PYTEST_FAILED_TESTS_CACHE_FILE_PATH) as f:
last_failed_tests = json.load(f)
prioritized_tests = _parse_prev_failing_test_files(last_failed_tests)
return python_test_file_to_test_name(prioritized_tests)
with open(path) as f:
return python_test_file_to_test_name(
_parse_prev_failing_test_files(json.load(f))
)
def _parse_prev_failing_test_files(last_failed_tests: Dict[str, bool]) -> Set[str]: