Handle wrong workflow name from GitHub (#123301)

Fixes https://github.com/pytorch/pytorch/issues/122422.  From my testing, the problem is that GitHub didn't return the correct workflow name in some cases and used the path to the workflow instead.

Take https://github.com/pytorch/pytorch/pull/123104 as an example, the returning name from GH graphql was `.github/workflows/generated-linux-binary-conda-nightly.yml` while the name we had on Rockset was `linux-binary-conda`.  The latter was correct, but the mismatch caused mergebot to miss the flaky failures.

This is a weird issue because retrying the graphql query eventually returns the correct name.

First query:
![Screenshot 2024-04-03 at 15 28 37](https://github.com/pytorch/pytorch/assets/475357/81a8ada4-c241-4e6b-b45d-7a6de1c3a151)

After several retries:
![Screenshot 2024-04-03 at 15 31 53](https://github.com/pytorch/pytorch/assets/475357/402c2e8c-f963-45f6-8c10-e1d2f49c5479)

Then I could never get the result like the first query again.

The fix here is to keep track of the job ID so that we can compare it instead of the `workflow / job` name.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/123301
Approved by: https://github.com/clee2000
This commit is contained in:
Huy Do
2024-04-04 07:00:40 +00:00
committed by PyTorch MergeBot
parent dbeb214043
commit f00ece024b
4 changed files with 54 additions and 9 deletions

View File

@ -123,6 +123,7 @@ fragment PRCheckSuites on CheckSuiteConnection {
workflow {
name
}
databaseId
url
}
checkRuns(first: 50) {
@ -1618,28 +1619,37 @@ def remove_job_name_suffix(name: str, replacement: str = ")") -> str:
def is_broken_trunk(
name: str,
check: JobCheckState,
drci_classifications: Any,
) -> bool:
if not name or not drci_classifications:
if not check or not drci_classifications:
return False
name = check.name
job_id = check.job_id
# Consult the list of broken trunk failures from Dr.CI
return any(
name == broken_trunk["name"]
(name == broken_trunk["name"]) or (job_id and job_id == broken_trunk["id"])
for broken_trunk in drci_classifications.get("BROKEN_TRUNK", [])
)
def is_flaky(
name: str,
check: JobCheckState,
drci_classifications: Any,
) -> bool:
if not name or not drci_classifications:
if not check or not drci_classifications:
return False
name = check.name
job_id = check.job_id
# Consult the list of flaky failures from Dr.CI
return any(name == flaky["name"] for flaky in drci_classifications.get("FLAKY", []))
return any(
(name == flaky["name"] or (job_id and job_id == flaky["id"]))
for flaky in drci_classifications.get("FLAKY", [])
)
def is_invalid_cancel(
@ -1726,7 +1736,7 @@ def get_classifications(
# NB: It's important to note that when it comes to ghstack and broken trunk classification,
# Dr.CI uses the base of the whole stack
if is_broken_trunk(name, drci_classifications):
if is_broken_trunk(check, drci_classifications):
checks_with_classifications[name] = JobCheckState(
check.name,
check.url,
@ -1738,7 +1748,7 @@ def get_classifications(
)
continue
elif is_flaky(name, drci_classifications):
elif is_flaky(check, drci_classifications):
checks_with_classifications[name] = JobCheckState(
check.name,
check.url,