mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Handle wrong workflow name from GitHub (#123301)
Fixes https://github.com/pytorch/pytorch/issues/122422. From my testing, the problem is that GitHub didn't return the correct workflow name in some cases and used the path to the workflow instead. Take https://github.com/pytorch/pytorch/pull/123104 as an example, the returning name from GH graphql was `.github/workflows/generated-linux-binary-conda-nightly.yml` while the name we had on Rockset was `linux-binary-conda`. The latter was correct, but the mismatch caused mergebot to miss the flaky failures. This is a weird issue because retrying the graphql query eventually returns the correct name. First query:  After several retries:  Then I could never get the result like the first query again. The fix here is to keep track of the job ID so that we can compare it instead of the `workflow / job` name. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123301 Approved by: https://github.com/clee2000
This commit is contained in:
BIN
.github/scripts/drci_mocks.json.gz
vendored
BIN
.github/scripts/drci_mocks.json.gz
vendored
Binary file not shown.
BIN
.github/scripts/gql_mocks.json.gz
vendored
BIN
.github/scripts/gql_mocks.json.gz
vendored
Binary file not shown.
37
.github/scripts/test_trymerge.py
vendored
37
.github/scripts/test_trymerge.py
vendored
@ -397,7 +397,7 @@ class TestTryMerge(TestCase):
|
||||
def test_gql_retrieve_checksuites(self, *args: Any) -> None:
|
||||
"Fetch comments and conclusions for PR with 60 commits"
|
||||
pr = GitHubPR("pytorch", "pytorch", 94787)
|
||||
self.assertEqual(len(pr.get_checkrun_conclusions()), 183)
|
||||
self.assertEqual(len(pr.get_checkrun_conclusions()), 182)
|
||||
|
||||
def test_team_members(self, *args: Any) -> None:
|
||||
"Test fetching team members works"
|
||||
@ -832,6 +832,41 @@ class TestBypassFailures(TestCase):
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
|
||||
def test_get_classifications_wrong_workflow_name(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 123104)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
|
||||
check_name = "linux-binary-conda / conda-py3_8-cuda11_8-build / build"
|
||||
check_name_workflow_path = ".github/workflows/generated-linux-binary-conda-nightly.yml / conda-py3_8-cuda11_8-build / build"
|
||||
|
||||
# Mock a check where the workflow name uses the full path
|
||||
checks[check_name_workflow_path] = JobCheckState(
|
||||
check_name_workflow_path,
|
||||
checks[check_name].url,
|
||||
checks[check_name].status,
|
||||
checks[check_name].classification,
|
||||
checks[check_name].job_id,
|
||||
checks[check_name].title,
|
||||
checks[check_name].summary,
|
||||
)
|
||||
del checks[check_name]
|
||||
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(
|
||||
checks,
|
||||
list(checks.keys()),
|
||||
)
|
||||
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 0)
|
||||
|
||||
@mock.patch("trymerge.read_merge_rules", side_effect=xla_merge_rules)
|
||||
def test_dont_ignore_flaky_failures(self, *args: Any) -> None:
|
||||
"""
|
||||
|
26
.github/scripts/trymerge.py
vendored
26
.github/scripts/trymerge.py
vendored
@ -123,6 +123,7 @@ fragment PRCheckSuites on CheckSuiteConnection {
|
||||
workflow {
|
||||
name
|
||||
}
|
||||
databaseId
|
||||
url
|
||||
}
|
||||
checkRuns(first: 50) {
|
||||
@ -1618,28 +1619,37 @@ def remove_job_name_suffix(name: str, replacement: str = ")") -> str:
|
||||
|
||||
|
||||
def is_broken_trunk(
|
||||
name: str,
|
||||
check: JobCheckState,
|
||||
drci_classifications: Any,
|
||||
) -> bool:
|
||||
if not name or not drci_classifications:
|
||||
if not check or not drci_classifications:
|
||||
return False
|
||||
|
||||
name = check.name
|
||||
job_id = check.job_id
|
||||
|
||||
# Consult the list of broken trunk failures from Dr.CI
|
||||
return any(
|
||||
name == broken_trunk["name"]
|
||||
(name == broken_trunk["name"]) or (job_id and job_id == broken_trunk["id"])
|
||||
for broken_trunk in drci_classifications.get("BROKEN_TRUNK", [])
|
||||
)
|
||||
|
||||
|
||||
def is_flaky(
|
||||
name: str,
|
||||
check: JobCheckState,
|
||||
drci_classifications: Any,
|
||||
) -> bool:
|
||||
if not name or not drci_classifications:
|
||||
if not check or not drci_classifications:
|
||||
return False
|
||||
|
||||
name = check.name
|
||||
job_id = check.job_id
|
||||
|
||||
# Consult the list of flaky failures from Dr.CI
|
||||
return any(name == flaky["name"] for flaky in drci_classifications.get("FLAKY", []))
|
||||
return any(
|
||||
(name == flaky["name"] or (job_id and job_id == flaky["id"]))
|
||||
for flaky in drci_classifications.get("FLAKY", [])
|
||||
)
|
||||
|
||||
|
||||
def is_invalid_cancel(
|
||||
@ -1726,7 +1736,7 @@ def get_classifications(
|
||||
|
||||
# NB: It's important to note that when it comes to ghstack and broken trunk classification,
|
||||
# Dr.CI uses the base of the whole stack
|
||||
if is_broken_trunk(name, drci_classifications):
|
||||
if is_broken_trunk(check, drci_classifications):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
check.url,
|
||||
@ -1738,7 +1748,7 @@ def get_classifications(
|
||||
)
|
||||
continue
|
||||
|
||||
elif is_flaky(name, drci_classifications):
|
||||
elif is_flaky(check, drci_classifications):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
check.url,
|
||||
|
Reference in New Issue
Block a user