[BE] Clean up trymerge code handling flaky failures (#110133)

This is the 2nd part of https://github.com/pytorch/pytorch/pull/110054.  The flaky classification has been done on Dr.CI.  There is no need to download flaky rule files and do the check anymore.  Some tests are also updated with new examples because we mocked the list of flaky rules there.  Similar tests have been done on Dr.CI.

* [x] https://github.com/pytorch/pytorch/pull/110054
* [x] Clean up the flaky rules logic because it has already been implemented on Dr. CI
* [ ] Clean up the broken trunk logic for the same reason

Pull Request resolved: https://github.com/pytorch/pytorch/pull/110133
Approved by: https://github.com/clee2000
This commit is contained in:
Huy Do
2023-09-30 08:01:00 +00:00
committed by PyTorch MergeBot
parent f7ba3e85e2
commit 81a74457ca
5 changed files with 52 additions and 116 deletions

View File

@ -74,29 +74,6 @@ class WorkflowCheckState:
self.jobs: JobNameToStateDict = {}
class FlakyRule:
def __init__(self, name: str, captures: List[str]):
self.name = re.compile(name)
self.captures = [re.compile(r) for r in captures]
def matches(self, job: Optional[Dict[str, Any]]) -> bool:
return (
job is not None
and self.name.search(job.get("name", "")) is not None
and job.get("failure_captures") is not None
and all(
any(
r.search(capture) is not None
for capture in job.get("failure_captures", [])
)
for r in self.captures
)
)
def __repr__(self) -> str:
return f"FlakyRule[name='{self.name}', captures={self.captures}]"
GH_PR_REVIEWS_FRAGMENT = """
fragment PRReviews on PullRequestReviewConnection {
nodes {
@ -1261,13 +1238,6 @@ def read_merge_rules(
return [MergeRule(**x) for x in rc]
@lru_cache(maxsize=None)
def read_flaky_rules() -> List[FlakyRule]:
# NOTE: This is currently hardcoded, can be extended to do per repo rules
FLAKY_RULES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/flaky-rules.json"
return _get_flaky_rules(FLAKY_RULES_URL)
def find_matching_merge_rule(
pr: GitHubPR,
repo: Optional[GitRepo] = None,
@ -1298,7 +1268,6 @@ def find_matching_merge_rule(
reject_reason = f"No rule found to match PR. Please [report]{issue_link} this issue to DevX team."
rules = read_merge_rules(repo, pr.org, pr.project)
flaky_rules = read_flaky_rules()
if not rules:
reject_reason = f"Rejecting the merge as no rules are defined for the repository in {MERGE_RULE_PATH}"
raise RuntimeError(reject_reason)
@ -1318,7 +1287,6 @@ def find_matching_merge_rule(
checks,
pr.last_commit()["oid"],
base_rev,
flaky_rules,
ignore_current_checks=ignore_current_checks,
)
@ -1469,11 +1437,6 @@ def checks_to_markdown_bullets(
]
@retries_decorator(rc=[])
def _get_flaky_rules(url: str) -> List[FlakyRule]:
return [FlakyRule(**rule) for rule in gh_fetch_json_list(url)]
@retries_decorator()
def save_merge_record(
collection: str,
@ -1620,7 +1583,6 @@ def is_broken_trunk(
def is_flaky(
head_job: Optional[Dict[str, Any]],
flaky_rules: List[FlakyRule],
drci_classifications: Any,
) -> bool:
if not head_job or not drci_classifications:
@ -1630,7 +1592,7 @@ def is_flaky(
return any(
head_job["name"] == flaky["name"]
for flaky in drci_classifications.get("FLAKY", [])
) or any(rule.matches(head_job) for rule in flaky_rules)
)
def get_classifications(
@ -1639,7 +1601,6 @@ def get_classifications(
checks: Dict[str, JobCheckState],
head_sha: str,
merge_base: Optional[str],
flaky_rules: List[FlakyRule],
ignore_current_checks: Optional[List[str]],
) -> Dict[str, JobCheckState]:
# Group by job name without shard id and suffix to correctly identify broken
@ -1716,6 +1677,9 @@ def get_classifications(
name_no_suffix = remove_job_name_suffix(name)
head_sha_job = head_sha_jobs.get(name_no_suffix, {}).get(name)
# NB: It's important to note that when it comes to ghstack and broken trunk classification,
# the current implementation of trymerge uses the base of the current PR in the stack, i.e.
# gh/user/xx/base, while Dr.CI uses the base of the whole stack. Both works though
if is_broken_trunk(head_sha_job, merge_base_jobs.get(name_no_suffix)):
checks_with_classifications[name] = JobCheckState(
check.name,
@ -1727,7 +1691,7 @@ def get_classifications(
)
continue
elif is_flaky(head_sha_job, flaky_rules, drci_classifications):
elif is_flaky(head_sha_job, drci_classifications):
checks_with_classifications[name] = JobCheckState(
check.name, check.url, check.status, "FLAKY", check.job_id, check.title
)
@ -2019,7 +1983,6 @@ def merge(
start_time = time.time()
last_exception = ""
elapsed_time = 0.0
flaky_rules = read_flaky_rules()
ignore_current_checks = [
x[0] for x in ignore_current_checks_info
] # convert to List[str] for convenience
@ -2057,7 +2020,6 @@ def merge(
checks,
pr.last_commit()["oid"],
pr.get_merge_base(),
flaky_rules,
ignore_current_checks=ignore_current_checks,
)
pending, failing, _ = categorize_checks(