From 63ec5cd15873fb686327e949977fb756d7aa5816 Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Thu, 7 Mar 2024 19:36:11 +0000 Subject: [PATCH] TD Heuristic for tests mentioned in PR body, less verbose TD printing (#120621) Move tests that are mentioned in PR body or commit message to front. Also attempts to find any issues/PRs mentioned in the PR body and search for those too (ex if you link a disable issue and that issue contains the test file that it was failing on) looking for: dynamo/test_export_mutations Also removes some printed information in TD Pull Request resolved: https://github.com/pytorch/pytorch/pull/120621 Approved by: https://github.com/osalpekar --- .github/workflows/target_determination.yml | 2 + .../target_determination/determinator.py | 3 +- .../heuristics/__init__.py | 4 +- .../heuristics/interface.py | 8 ++- .../heuristics/mentioned_in_pr.py | 66 +++++++++++++++++++ .../target_determination/heuristics/utils.py | 41 ++++++++++++ 6 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 tools/testing/target_determination/heuristics/mentioned_in_pr.py diff --git a/.github/workflows/target_determination.yml b/.github/workflows/target_determination.yml index 530272598324..4e2066024d04 100644 --- a/.github/workflows/target_determination.yml +++ b/.github/workflows/target_determination.yml @@ -39,6 +39,7 @@ jobs: id: td continue-on-error: true env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_WORKFLOW: ${{ github.workflow }} GITHUB_JOB: ${{ github.job }} @@ -47,6 +48,7 @@ jobs: GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }} JOB_ID: ${{ steps.get-job-id.outputs.job-id }} JOB_NAME: ${{ steps.get-job-id.outputs.job-name }} + PR_NUMBER: ${{ github.event.pull_request.number }} run: | python3 -m pip install boto3==1.19.12 python3 tools/testing/do_target_determination_for_s3.py diff --git a/tools/testing/target_determination/determinator.py b/tools/testing/target_determination/determinator.py index 35994758f380..17320a73a194 100644 --- a/tools/testing/target_determination/determinator.py +++ b/tools/testing/target_determination/determinator.py @@ -20,6 +20,7 @@ def get_test_prioritizations( new_rankings: TestPrioritizations = heuristic.get_prediction_confidence(tests) aggregated_results.add_heuristic_results(heuristic, new_rankings) - print(new_rankings.get_info_str(), file=file) + print(f"Results from {heuristic.__class__.__name__}") + print(new_rankings.get_info_str(verbose=False), file=file) return aggregated_results diff --git a/tools/testing/target_determination/heuristics/__init__.py b/tools/testing/target_determination/heuristics/__init__.py index 3452d935e427..9842613b5a74 100644 --- a/tools/testing/target_determination/heuristics/__init__.py +++ b/tools/testing/target_determination/heuristics/__init__.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Tuple from tools.testing.target_determination.heuristics.correlated_with_historical_failures import ( CorrelatedWithHistoricalFailures, @@ -16,6 +16,7 @@ from tools.testing.target_determination.heuristics.interface import ( HeuristicInterface as HeuristicInterface, TestPrioritizations as TestPrioritizations, ) +from tools.testing.target_determination.heuristics.mentioned_in_pr import MentionedInPR from tools.testing.target_determination.heuristics.previously_failed_in_pr import ( PreviouslyFailedInPR, @@ -27,6 +28,7 @@ from tools.testing.target_determination.heuristics.profiling import Profiling HEURISTICS: List[HeuristicInterface] = [ PreviouslyFailedInPR(), EditedByPR(), + MentionedInPR(), HistoricalClassFailurCorrelation(trial_mode=True), CorrelatedWithHistoricalFailures(), HistorialEditedFiles(), diff --git a/tools/testing/target_determination/heuristics/interface.py b/tools/testing/target_determination/heuristics/interface.py index a5dae08f74cb..6cdd90e8a7f4 100644 --- a/tools/testing/target_determination/heuristics/interface.py +++ b/tools/testing/target_determination/heuristics/interface.py @@ -118,13 +118,15 @@ class TestPrioritizations: tests = [x[1] for x in self._traverse_scores()] return tests[: n * len(tests) // 100], tests[n * len(tests) // 100 :] - def get_info_str(self) -> str: + def get_info_str(self, verbose: bool = True) -> str: info = "" for score, test in self._traverse_scores(): - info += f"{test} ({score})\n" + if not verbose and score == 0: + continue + info += f" {test} ({score})\n" - return info.strip() + return info.rstrip() def print_info(self) -> None: print(self.get_info_str()) diff --git a/tools/testing/target_determination/heuristics/mentioned_in_pr.py b/tools/testing/target_determination/heuristics/mentioned_in_pr.py new file mode 100644 index 000000000000..56c14b4a6a1a --- /dev/null +++ b/tools/testing/target_determination/heuristics/mentioned_in_pr.py @@ -0,0 +1,66 @@ +import os +import re +from typing import Any, List + +from tools.testing.target_determination.heuristics.interface import ( + HeuristicInterface, + TestPrioritizations, +) +from tools.testing.target_determination.heuristics.utils import ( + get_git_commit_info, + get_issue_or_pr_body, +) +from tools.testing.test_run import TestRun + + +# This heuristic searches the PR body and commit titles, as well as issues/PRs +# mentioned in the PR body/commit title for test names (search depth of 1) and +# gives the test a rating of 1. For example, if I mention "test_foo" in the PR +# body, test_foo will be rated 1. If I mention #123 in the PR body, and #123 +# mentions "test_foo", test_foo will be rated 1. +class MentionedInPR(HeuristicInterface): + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + def _search_for_linked_issues(self, s: str) -> List[str]: + return re.findall(r"#(\d+)", s) + re.findall(r"/pytorch/pytorch/.*/(\d+)", s) + + def get_prediction_confidence(self, tests: List[str]) -> TestPrioritizations: + try: + commit_messages = get_git_commit_info() + except Exception as e: + print(f"Can't get commit info due to {e}") + commit_messages = "" + try: + pr_number = os.environ.get("PR_NUMBER", "") + if pr_number == "": + re_match = re.match( + r"^refs/tags/.*/(\d+)$", os.environ.get("GITHUB_REF", "") + ) + if re_match is not None: + pr_number = re_match.group(1) + pr_body = get_issue_or_pr_body(int(pr_number)) + except Exception as e: + print(f"Can't get PR body due to {e}") + pr_body = "" + + # Search for linked issues or PRs + linked_issue_bodies: List[str] = [] + for issue in self._search_for_linked_issues( + commit_messages + ) + self._search_for_linked_issues(pr_body): + try: + linked_issue_bodies.append(get_issue_or_pr_body(int(issue))) + except Exception as e: + pass + + mentioned = [] + for test in tests: + if ( + test in commit_messages + or test in pr_body + or any(test in body for body in linked_issue_bodies) + ): + mentioned.append(test) + + return TestPrioritizations(tests, {TestRun(test): 1 for test in mentioned}) diff --git a/tools/testing/target_determination/heuristics/utils.py b/tools/testing/target_determination/heuristics/utils.py index 84cad1630e85..a0823ab66e1e 100644 --- a/tools/testing/target_determination/heuristics/utils.py +++ b/tools/testing/target_determination/heuristics/utils.py @@ -4,6 +4,7 @@ import subprocess from collections import defaultdict from pathlib import Path from typing import cast, Dict, List, Set, Union +from urllib.request import Request, urlopen from warnings import warn from tools.testing.test_run import TestRun @@ -48,6 +49,46 @@ def query_changed_files() -> List[str]: return lines +def get_git_commit_info() -> str: + """Gets the commit info since the last commit on the default branch.""" + default_branch = f"origin/{os.environ.get('GIT_DEFAULT_BRANCH', 'main')}" + + merge_base = ( + subprocess.check_output(["git", "merge-base", default_branch, "HEAD"]) + .decode() + .strip() + ) + + head = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip() + + base_commit = merge_base + if base_commit == head: + # We are on the default branch, so check for changes since the last commit + base_commit = "HEAD^" + + return ( + subprocess.check_output( + ["git", "log", f"{base_commit}..HEAD"], + ) + .decode() + .strip() + ) + + +def get_issue_or_pr_body(number: int) -> str: + """Gets the body of an issue or PR""" + github_token = os.environ.get("GITHUB_TOKEN") + headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {github_token}", + } + # Despite the 'issues' in the link, this also works for PRs + url = f"https://api.github.com/repos/pytorch/pytorch/issues/{number}" + with urlopen(Request(url, headers=headers)) as conn: + body: str = json.loads(conn.read().decode())["body"] + return body + + def normalize_ratings( ratings: Dict[TestRun, float], max_value: float ) -> Dict[TestRun, float]: