mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
[BE] Clean up trymerge code handling flaky failures (#110133)
This is the 2nd part of https://github.com/pytorch/pytorch/pull/110054. The flaky classification has been done on Dr.CI. There is no need to download flaky rule files and do the check anymore. Some tests are also updated with new examples because we mocked the list of flaky rules there. Similar tests have been done on Dr.CI. * [x] https://github.com/pytorch/pytorch/pull/110054 * [x] Clean up the flaky rules logic because it has already been implemented on Dr. CI * [ ] Clean up the broken trunk logic for the same reason Pull Request resolved: https://github.com/pytorch/pytorch/pull/110133 Approved by: https://github.com/clee2000
This commit is contained in:
48
.github/scripts/trymerge.py
vendored
48
.github/scripts/trymerge.py
vendored
@ -74,29 +74,6 @@ class WorkflowCheckState:
|
||||
self.jobs: JobNameToStateDict = {}
|
||||
|
||||
|
||||
class FlakyRule:
|
||||
def __init__(self, name: str, captures: List[str]):
|
||||
self.name = re.compile(name)
|
||||
self.captures = [re.compile(r) for r in captures]
|
||||
|
||||
def matches(self, job: Optional[Dict[str, Any]]) -> bool:
|
||||
return (
|
||||
job is not None
|
||||
and self.name.search(job.get("name", "")) is not None
|
||||
and job.get("failure_captures") is not None
|
||||
and all(
|
||||
any(
|
||||
r.search(capture) is not None
|
||||
for capture in job.get("failure_captures", [])
|
||||
)
|
||||
for r in self.captures
|
||||
)
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"FlakyRule[name='{self.name}', captures={self.captures}]"
|
||||
|
||||
|
||||
GH_PR_REVIEWS_FRAGMENT = """
|
||||
fragment PRReviews on PullRequestReviewConnection {
|
||||
nodes {
|
||||
@ -1261,13 +1238,6 @@ def read_merge_rules(
|
||||
return [MergeRule(**x) for x in rc]
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def read_flaky_rules() -> List[FlakyRule]:
|
||||
# NOTE: This is currently hardcoded, can be extended to do per repo rules
|
||||
FLAKY_RULES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/flaky-rules.json"
|
||||
return _get_flaky_rules(FLAKY_RULES_URL)
|
||||
|
||||
|
||||
def find_matching_merge_rule(
|
||||
pr: GitHubPR,
|
||||
repo: Optional[GitRepo] = None,
|
||||
@ -1298,7 +1268,6 @@ def find_matching_merge_rule(
|
||||
reject_reason = f"No rule found to match PR. Please [report]{issue_link} this issue to DevX team."
|
||||
|
||||
rules = read_merge_rules(repo, pr.org, pr.project)
|
||||
flaky_rules = read_flaky_rules()
|
||||
if not rules:
|
||||
reject_reason = f"Rejecting the merge as no rules are defined for the repository in {MERGE_RULE_PATH}"
|
||||
raise RuntimeError(reject_reason)
|
||||
@ -1318,7 +1287,6 @@ def find_matching_merge_rule(
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
base_rev,
|
||||
flaky_rules,
|
||||
ignore_current_checks=ignore_current_checks,
|
||||
)
|
||||
|
||||
@ -1469,11 +1437,6 @@ def checks_to_markdown_bullets(
|
||||
]
|
||||
|
||||
|
||||
@retries_decorator(rc=[])
|
||||
def _get_flaky_rules(url: str) -> List[FlakyRule]:
|
||||
return [FlakyRule(**rule) for rule in gh_fetch_json_list(url)]
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def save_merge_record(
|
||||
collection: str,
|
||||
@ -1620,7 +1583,6 @@ def is_broken_trunk(
|
||||
|
||||
def is_flaky(
|
||||
head_job: Optional[Dict[str, Any]],
|
||||
flaky_rules: List[FlakyRule],
|
||||
drci_classifications: Any,
|
||||
) -> bool:
|
||||
if not head_job or not drci_classifications:
|
||||
@ -1630,7 +1592,7 @@ def is_flaky(
|
||||
return any(
|
||||
head_job["name"] == flaky["name"]
|
||||
for flaky in drci_classifications.get("FLAKY", [])
|
||||
) or any(rule.matches(head_job) for rule in flaky_rules)
|
||||
)
|
||||
|
||||
|
||||
def get_classifications(
|
||||
@ -1639,7 +1601,6 @@ def get_classifications(
|
||||
checks: Dict[str, JobCheckState],
|
||||
head_sha: str,
|
||||
merge_base: Optional[str],
|
||||
flaky_rules: List[FlakyRule],
|
||||
ignore_current_checks: Optional[List[str]],
|
||||
) -> Dict[str, JobCheckState]:
|
||||
# Group by job name without shard id and suffix to correctly identify broken
|
||||
@ -1716,6 +1677,9 @@ def get_classifications(
|
||||
name_no_suffix = remove_job_name_suffix(name)
|
||||
head_sha_job = head_sha_jobs.get(name_no_suffix, {}).get(name)
|
||||
|
||||
# NB: It's important to note that when it comes to ghstack and broken trunk classification,
|
||||
# the current implementation of trymerge uses the base of the current PR in the stack, i.e.
|
||||
# gh/user/xx/base, while Dr.CI uses the base of the whole stack. Both works though
|
||||
if is_broken_trunk(head_sha_job, merge_base_jobs.get(name_no_suffix)):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
@ -1727,7 +1691,7 @@ def get_classifications(
|
||||
)
|
||||
continue
|
||||
|
||||
elif is_flaky(head_sha_job, flaky_rules, drci_classifications):
|
||||
elif is_flaky(head_sha_job, drci_classifications):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name, check.url, check.status, "FLAKY", check.job_id, check.title
|
||||
)
|
||||
@ -2019,7 +1983,6 @@ def merge(
|
||||
start_time = time.time()
|
||||
last_exception = ""
|
||||
elapsed_time = 0.0
|
||||
flaky_rules = read_flaky_rules()
|
||||
ignore_current_checks = [
|
||||
x[0] for x in ignore_current_checks_info
|
||||
] # convert to List[str] for convenience
|
||||
@ -2057,7 +2020,6 @@ def merge(
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
pr.get_merge_base(),
|
||||
flaky_rules,
|
||||
ignore_current_checks=ignore_current_checks,
|
||||
)
|
||||
pending, failing, _ = categorize_checks(
|
||||
|
Reference in New Issue
Block a user