[mergebot] Fix pending unstable jobs being viewed as failed (#128080)

https://github.com/pytorch/pytorch/pull/128038#issuecomment-2150802030

In the above, pending unstable jobs get put into the ok_failed_checks list, and because there are a lot of unstable jobs, it exceeds the threshold and merge fails.

I don't think unstable jobs should be considered in the ok failed checks threshold, only flaky and broken trunk jobs should be considered there.

Change looks big, but main thing is that unstable jobs don't get included in the check for how many flaky failures there are.  The other changes are mostly renames so things are clearer
Pull Request resolved: https://github.com/pytorch/pytorch/pull/128080
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee
2024-06-06 18:22:20 +00:00
committed by PyTorch MergeBot
parent 32fb68960e
commit 936225d7b2
2 changed files with 21 additions and 21 deletions

View File

@ -773,13 +773,13 @@ class TestBypassFailures(TestCase):
# than the one on the base commit. This should still count as broken trunk # than the one on the base commit. This should still count as broken trunk
"pr_num": 104214, "pr_num": 104214,
"related_failure_count": 0, "related_failure_count": 0,
"unrelated_failure_count": 1, "flaky_or_broken_trunk": 1,
}, },
{ {
# This PR had one broken trunk failure and it used ghstack # This PR had one broken trunk failure and it used ghstack
"pr_num": 105145, "pr_num": 105145,
"related_failure_count": 0, "related_failure_count": 0,
"unrelated_failure_count": 1, "flaky_or_broken_trunk": 1,
}, },
{ {
# The failure on the merge base was retried successfully and # The failure on the merge base was retried successfully and
@ -788,20 +788,20 @@ class TestBypassFailures(TestCase):
# be used to detect broken trunk # be used to detect broken trunk
"pr_num": 107160, "pr_num": 107160,
"related_failure_count": 0, "related_failure_count": 0,
"unrelated_failure_count": 4, "flaky_or_broken_trunk": 1,
}, },
{ {
# This PR used Dr.CI broken trunk classification # This PR used Dr.CI broken trunk classification
"pr_num": 111253, "pr_num": 111253,
"related_failure_count": 1, "related_failure_count": 1,
"unrelated_failure_count": 2, "flaky_or_broken_trunk": 1,
}, },
] ]
for case in test_cases: for case in test_cases:
pr_num = case["pr_num"] pr_num = case["pr_num"]
related_failure_count = case["related_failure_count"] related_failure_count = case["related_failure_count"]
unrelated_failure_count = case["unrelated_failure_count"] flaky_or_broken_trunk = case["flaky_or_broken_trunk"]
pr = GitHubPR("pytorch", "pytorch", pr_num) pr = GitHubPR("pytorch", "pytorch", pr_num)
checks = pr.get_checkrun_conclusions() checks = pr.get_checkrun_conclusions()
@ -823,7 +823,7 @@ class TestBypassFailures(TestCase):
) )
self.assertTrue(len(pending) == 0) self.assertTrue(len(pending) == 0)
self.assertTrue( self.assertTrue(
len(failed) == unrelated_failure_count + related_failure_count len(failed) == flaky_or_broken_trunk + related_failure_count
) )
def test_ignore_current(self, *args: Any) -> None: def test_ignore_current(self, *args: Any) -> None:

View File

@ -2027,10 +2027,8 @@ def categorize_checks(
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = [] pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = [] failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
# ok_failed_checks is used with ok_failed_checks_threshold while ignorable_failed_checks # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
# is used to keep track of all ignorable failures when saving the merge record on Rockset failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
ok_failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
ignorable_failed_checks: Dict[str, List[Any]] = defaultdict(list)
# If required_checks is not set or empty, consider all names are relevant # If required_checks is not set or empty, consider all names are relevant
relevant_checknames = [ relevant_checknames = [
@ -2058,36 +2056,38 @@ def categorize_checks(
continue continue
elif not is_passing_status(check_runs[checkname].status): elif not is_passing_status(check_runs[checkname].status):
target = ( target = (
ignorable_failed_checks[classification] failed_checks_categorization[classification]
if classification if classification
in ("IGNORE_CURRENT_CHECK", "BROKEN_TRUNK", "FLAKY", "UNSTABLE") in ("IGNORE_CURRENT_CHECK", "BROKEN_TRUNK", "FLAKY", "UNSTABLE")
else failed_checks else failed_checks
) )
target.append((checkname, url, job_id)) target.append((checkname, url, job_id))
if classification in ("BROKEN_TRUNK", "FLAKY", "UNSTABLE"): flaky_or_broken_trunk = (
ok_failed_checks.append((checkname, url, job_id)) failed_checks_categorization["BROKEN_TRUNK"]
+ failed_checks_categorization["FLAKY"]
)
if ok_failed_checks: if flaky_or_broken_trunk:
warn( warn(
f"The following {len(ok_failed_checks)} checks failed but were likely due flakiness or broken trunk: " f"The following {len(flaky_or_broken_trunk)} checks failed but were likely due flakiness or broken trunk: "
+ ", ".join([x[0] for x in ok_failed_checks]) + ", ".join([x[0] for x in flaky_or_broken_trunk])
+ ( + (
f" but this is greater than the threshold of {ok_failed_checks_threshold} so merge will fail" f" but this is greater than the threshold of {ok_failed_checks_threshold} so merge will fail"
if ok_failed_checks_threshold is not None if ok_failed_checks_threshold is not None
and len(ok_failed_checks) > ok_failed_checks_threshold and len(flaky_or_broken_trunk) > ok_failed_checks_threshold
else "" else ""
) )
) )
if ( if (
ok_failed_checks_threshold is not None ok_failed_checks_threshold is not None
and len(ok_failed_checks) > ok_failed_checks_threshold and len(flaky_or_broken_trunk) > ok_failed_checks_threshold
): ):
failed_checks = failed_checks + ok_failed_checks failed_checks = failed_checks + flaky_or_broken_trunk
# The list of ignorable_failed_checks is returned so that it can be saved into the Rockset merge record # The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
return (pending_checks, failed_checks, ignorable_failed_checks) return (pending_checks, failed_checks, failed_checks_categorization)
def merge( def merge(