CI workflow for performed test regressions (#39198)

* WIP script to compare test runs for models

* Update line normalitzation logic

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Ákos Hadnagy
2025-07-16 04:20:02 +02:00
committed by GitHub
parent 1bc9ac5107
commit 0dc2df5dda
3 changed files with 183 additions and 13 deletions

View File

@ -0,0 +1,91 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
def normalize_test_line(line):
line = line.strip()
# Normalize SKIPPED/XFAIL/etc with path:line and reason
match = re.match(r"^(SKIPPED|XFAIL|XPASS|EXPECTEDFAIL)\s+\[?\d*\]?\s*(\S+:\d+)", line)
if match:
status, location = match.groups()
return f"{status} {location}"
# Normalize ERROR/FAILED lines with optional message
if line.startswith("ERROR") or line.startswith("FAILED"):
return re.split(r"\s+-\s+", line)[0].strip()
return line
def parse_summary_file(file_path):
test_set = set()
with open(file_path, "r", encoding="utf-8") as f:
in_summary = False
for line in f:
if line.strip().startswith("==="):
in_summary = not in_summary
continue
if in_summary:
stripped = line.strip()
if stripped:
normalized = normalize_test_line(stripped)
test_set.add(normalized)
return test_set
def compare_job_sets(job_set1, job_set2):
all_job_names = sorted(set(job_set1) | set(job_set2))
report_lines = []
for job_name in all_job_names:
file1 = job_set1.get(job_name)
file2 = job_set2.get(job_name)
tests1 = parse_summary_file(file1) if file1 else set()
tests2 = parse_summary_file(file2) if file2 else set()
added = tests2 - tests1
removed = tests1 - tests2
if added or removed:
report_lines.append(f"=== Diff for job: {job_name} ===")
if removed:
report_lines.append("--- Absent in current run:")
for test in sorted(removed):
report_lines.append(f" - {test}")
if added:
report_lines.append("+++ Appeared in current run:")
for test in sorted(added):
report_lines.append(f" + {test}")
report_lines.append("") # blank line
return "\n".join(report_lines) if report_lines else "No differences found."
# Example usage:
# job_set_1 = {
# "albert": "prev/multi-gpu_run_models_gpu_models/albert_test_reports/summary_short.txt",
# "bloom": "prev/multi-gpu_run_models_gpu_models/bloom_test_reports/summary_short.txt",
# }
# job_set_2 = {
# "albert": "curr/multi-gpu_run_models_gpu_models/albert_test_reports/summary_short.txt",
# "bloom": "curr/multi-gpu_run_models_gpu_models/bloom_test_reports/summary_short.txt",
# }
# report = compare_job_sets(job_set_1, job_set_2)
# print(report)

View File

@ -91,7 +91,12 @@ def get_last_daily_ci_run_commit(token, workflow_run_id=None, workflow_id=None,
def get_last_daily_ci_artifacts( def get_last_daily_ci_artifacts(
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None output_dir,
token,
workflow_run_id=None,
workflow_id=None,
commit_sha=None,
artifact_names=None,
): ):
"""Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
workflow_run_id = get_last_daily_ci_workflow_run_id( workflow_run_id = get_last_daily_ci_workflow_run_id(
@ -99,37 +104,56 @@ def get_last_daily_ci_artifacts(
) )
if workflow_run_id is not None: if workflow_run_id is not None:
artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token) artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
if artifact_names is None:
artifact_names = artifacts_links.keys()
downloaded_artifact_names = []
for artifact_name in artifact_names: for artifact_name in artifact_names:
if artifact_name in artifacts_links: if artifact_name in artifacts_links:
artifact_url = artifacts_links[artifact_name] artifact_url = artifacts_links[artifact_name]
download_artifact( download_artifact(
artifact_name=artifact_name, artifact_url=artifact_url, output_dir=output_dir, token=token artifact_name=artifact_name, artifact_url=artifact_url, output_dir=output_dir, token=token
) )
downloaded_artifact_names.append(artifact_name)
return downloaded_artifact_names
def get_last_daily_ci_reports( def get_last_daily_ci_reports(
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None output_dir,
token,
workflow_run_id=None,
workflow_id=None,
commit_sha=None,
artifact_names=None,
): ):
"""Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
get_last_daily_ci_artifacts( downloaded_artifact_names = get_last_daily_ci_artifacts(
artifact_names,
output_dir, output_dir,
token, token,
workflow_run_id=workflow_run_id, workflow_run_id=workflow_run_id,
workflow_id=workflow_id, workflow_id=workflow_id,
commit_sha=commit_sha, commit_sha=commit_sha,
artifact_names=artifact_names,
) )
results = {} results = {}
for artifact_name in artifact_names: for artifact_name in downloaded_artifact_names:
artifact_zip_path = os.path.join(output_dir, f"{artifact_name}.zip") artifact_zip_path = os.path.join(output_dir, f"{artifact_name}.zip")
if os.path.isfile(artifact_zip_path): if os.path.isfile(artifact_zip_path):
results[artifact_name] = {} target_dir = os.path.join(output_dir, artifact_name)
with zipfile.ZipFile(artifact_zip_path) as z: with zipfile.ZipFile(artifact_zip_path) as z:
for filename in z.namelist(): z.extractall(target_dir)
if not os.path.isdir(filename):
results[artifact_name] = {}
filename = os.listdir(target_dir)
for filename in filename:
file_path = os.path.join(target_dir, filename)
if not os.path.isdir(file_path):
# read the file # read the file
with z.open(filename) as f: with open(file_path) as fp:
results[artifact_name][filename] = f.read().decode("UTF-8") content = fp.read()
results[artifact_name][filename] = content
return results return results

View File

@ -24,6 +24,7 @@ import time
from typing import Any, Optional, Union from typing import Any, Optional, Union
import requests import requests
from compare_test_runs import compare_job_sets
from get_ci_error_statistics import get_jobs from get_ci_error_statistics import get_jobs
from get_previous_daily_ci import get_last_daily_ci_reports, get_last_daily_ci_run, get_last_daily_ci_workflow_run_id from get_previous_daily_ci import get_last_daily_ci_reports, get_last_daily_ci_run, get_last_daily_ci_workflow_run_id
from huggingface_hub import HfApi from huggingface_hub import HfApi
@ -672,6 +673,21 @@ class Message:
} }
blocks.append(block) blocks.append(block)
if diff_file_url is not None:
block = {
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"*Test results diff*\n\n(compared to previous run: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check test result diff file"},
"url": diff_file_url,
},
}
blocks.append(block)
if len(new_failure_blocks) > 0: if len(new_failure_blocks) > 0:
blocks.extend(new_failure_blocks) blocks.extend(new_failure_blocks)
@ -1460,13 +1476,14 @@ if __name__ == "__main__":
prev_ci_artifacts = (None, None) prev_ci_artifacts = (None, None)
other_ci_artifacts = [] other_ci_artifacts = []
output_dir = os.path.join(os.getcwd(), "previous_reports")
os.makedirs(output_dir, exist_ok=True)
for idx, target_workflow_run_id in enumerate([prev_workflow_run_id] + other_workflow_run_ids): for idx, target_workflow_run_id in enumerate([prev_workflow_run_id] + other_workflow_run_ids):
if target_workflow_run_id is None or target_workflow_run_id == "": if target_workflow_run_id is None or target_workflow_run_id == "":
continue continue
else: else:
artifact_names = [f"ci_results_{job_name}"] artifact_names = [f"ci_results_{job_name}"]
output_dir = os.path.join(os.getcwd(), "previous_reports")
os.makedirs(output_dir, exist_ok=True)
ci_artifacts = get_last_daily_ci_reports( ci_artifacts = get_last_daily_ci_reports(
artifact_names=artifact_names, artifact_names=artifact_names,
output_dir=output_dir, output_dir=output_dir,
@ -1478,6 +1495,44 @@ if __name__ == "__main__":
else: else:
other_ci_artifacts.append((target_workflow_run_id, ci_artifacts)) other_ci_artifacts.append((target_workflow_run_id, ci_artifacts))
# Only for AMD at this moment.
# TODO: put this into a method
if is_amd_daily_ci_workflow:
diff_file_url = None
if not (prev_workflow_run_id is None or prev_workflow_run_id == ""):
ci_artifacts = get_last_daily_ci_reports(
artifact_names=None,
output_dir=output_dir,
token=os.environ["ACCESS_REPO_INFO_TOKEN"],
workflow_run_id=prev_workflow_run_id,
)
current_artifacts = sorted([d for d in os.listdir() if os.path.isdir(d) and d.endswith("_test_reports")])
prev_artifacts = sorted([d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and d.endswith("_test_reports")]) # fmt: skip
current_artifacts_set = {}
for d in current_artifacts:
current_artifacts_set[d] = os.path.join(d, "summary_short.txt")
prev_artifacts_set = {}
for d in prev_artifacts:
prev_artifacts_set[d] = os.path.join(output_dir, d, "summary_short.txt")
report = compare_job_sets(prev_artifacts_set, current_artifacts_set)
with open(f"ci_results_{job_name}/test_results_diff.json", "w") as fp:
fp.write(report)
# upload
commit_info = api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/test_results_diff.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/test_results_diff.json",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
diff_file_url = f"https://huggingface.co/datasets/{report_repo_id}/resolve/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/test_results_diff.json"
ci_name_in_report = "" ci_name_in_report = ""
if job_name in job_to_test_map: if job_name in job_to_test_map:
ci_name_in_report = job_to_test_map[job_name] ci_name_in_report = job_to_test_map[job_name]