mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
Collated reports (#40080)
* Add initial collated reports script and job definition * provide commit hash for this run. Also use hash in generated artifact name. Json formatting * tidy * Add option to upload collated reports to hf hub * Add glob pattern for test report folders * Fix glob * Use machine_type as path filter instead of glob. Include machine_type in collated report
This commit is contained in:
49
.github/workflows/collated-reports.yml
vendored
Normal file
49
.github/workflows/collated-reports.yml
vendored
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
name: CI collated reports
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
job:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
report_repo_id:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
machine_type:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
gpu_name:
|
||||||
|
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
collated_reports:
|
||||||
|
name: Collated reports
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
if: always()
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
|
||||||
|
- name: Collated reports
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||||
|
CI_SHA: ${{ github.sha }}
|
||||||
|
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
pip install huggingface_hub
|
||||||
|
python3 utils/collated_reports.py \
|
||||||
|
--path /transformers/reports/ \
|
||||||
|
--machine-type ${{ inputs.machine_type }} \
|
||||||
|
--commit-hash ${{ env.CI_SHA }} \
|
||||||
|
--job ${{ inputs.job }} \
|
||||||
|
--report-repo-id ${{ inputs.report_repo_id }} \
|
||||||
|
--gpu-name ${{ inputs.gpu_name }}
|
||||||
|
|
||||||
|
- name: Upload collated reports
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: collated_reports_${{ env.CI_SHA }}.json
|
||||||
|
path: collated_reports_${{ env.CI_SHA }}.json
|
219
utils/collated_reports.py
Normal file
219
utils/collated_reports.py
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_GPU_NAMES = ["mi300", "mi325", "mi355", "h100", "a10"]
|
||||||
|
|
||||||
|
|
||||||
|
def simplify_gpu_name(gpu_name: str, simplified_names: list[str]) -> str:
|
||||||
|
matches = []
|
||||||
|
for simplified_name in simplified_names:
|
||||||
|
if simplified_name in gpu_name:
|
||||||
|
matches.append(simplified_name)
|
||||||
|
if len(matches) == 1:
|
||||||
|
return matches[0]
|
||||||
|
return gpu_name
|
||||||
|
|
||||||
|
|
||||||
|
def parse_short_summary_line(line: str) -> tuple[str | None, int]:
|
||||||
|
if line.startswith("PASSED"):
|
||||||
|
return "passed", 1
|
||||||
|
if line.startswith("FAILED"):
|
||||||
|
return "failed", 1
|
||||||
|
if line.startswith("SKIPPED"):
|
||||||
|
line = line.split("[", maxsplit=1)[1]
|
||||||
|
line = line.split("]", maxsplit=1)[0]
|
||||||
|
return "skipped", int(line)
|
||||||
|
if line.startswith("ERROR"):
|
||||||
|
return "error", 1
|
||||||
|
return None, 0
|
||||||
|
|
||||||
|
|
||||||
|
def validate_path(p: str) -> Path:
|
||||||
|
# Validate path and apply glob pattern if provided
|
||||||
|
path = Path(p)
|
||||||
|
assert path.is_dir(), f"Path {path} is not a directory"
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def get_gpu_name(gpu_name: str | None) -> str:
|
||||||
|
# Get GPU name if available
|
||||||
|
if gpu_name is None:
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
|
||||||
|
gpu_name = torch.cuda.get_device_name()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to get GPU name with {e}")
|
||||||
|
gpu_name = "unknown"
|
||||||
|
else:
|
||||||
|
gpu_name = gpu_name.replace(" ", "_").lower()
|
||||||
|
gpu_name = simplify_gpu_name(gpu_name, DEFAULT_GPU_NAMES)
|
||||||
|
|
||||||
|
return gpu_name
|
||||||
|
|
||||||
|
|
||||||
|
def get_commit_hash(commit_hash: str | None) -> str:
|
||||||
|
# Get commit hash if available
|
||||||
|
if commit_hash is None:
|
||||||
|
try:
|
||||||
|
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to get commit hash with {e}")
|
||||||
|
commit_hash = "unknown"
|
||||||
|
|
||||||
|
return commit_hash[:7]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Args:
|
||||||
|
path: Path
|
||||||
|
machine_type: str
|
||||||
|
gpu_name: str
|
||||||
|
commit_hash: str
|
||||||
|
job: str | None
|
||||||
|
report_repo_id: str | None
|
||||||
|
|
||||||
|
|
||||||
|
def get_arguments(args: argparse.Namespace) -> Args:
|
||||||
|
path = validate_path(args.path)
|
||||||
|
machine_type = args.machine_type
|
||||||
|
gpu_name = get_gpu_name(args.gpu_name)
|
||||||
|
commit_hash = get_commit_hash(args.commit_hash)
|
||||||
|
job = args.job
|
||||||
|
report_repo_id = args.report_repo_id
|
||||||
|
return Args(path, machine_type, gpu_name, commit_hash, job, report_repo_id)
|
||||||
|
|
||||||
|
|
||||||
|
def upload_collated_report(job: str, report_repo_id: str, filename: str):
|
||||||
|
# Alternatively we can check for the existence of the collated_reports file and upload in notification_service.py
|
||||||
|
import os
|
||||||
|
|
||||||
|
from get_previous_daily_ci import get_last_daily_ci_run
|
||||||
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
|
api = HfApi()
|
||||||
|
|
||||||
|
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
|
||||||
|
report_repo_subfolder = ""
|
||||||
|
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
|
||||||
|
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
|
||||||
|
report_repo_subfolder = f"runs/{report_repo_subfolder}"
|
||||||
|
|
||||||
|
workflow_run = get_last_daily_ci_run(
|
||||||
|
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
|
||||||
|
)
|
||||||
|
workflow_run_created_time = workflow_run["created_at"]
|
||||||
|
report_repo_folder = workflow_run_created_time.split("T")[0]
|
||||||
|
|
||||||
|
if report_repo_subfolder:
|
||||||
|
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
|
||||||
|
|
||||||
|
api.upload_file(
|
||||||
|
path_or_fileobj=f"ci_results_{job}/{filename}",
|
||||||
|
path_in_repo=f"{report_repo_folder}/ci_results_{job}/{filename}",
|
||||||
|
repo_id=report_repo_id,
|
||||||
|
repo_type="dataset",
|
||||||
|
token=os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Post process models test reports.")
|
||||||
|
parser.add_argument("--path", "-p", help="Path to the reports folder")
|
||||||
|
parser.add_argument(
|
||||||
|
"--machine-type", "-m", help="Process single or multi GPU results", choices=["single-gpu", "multi-gpu"]
|
||||||
|
)
|
||||||
|
parser.add_argument("--gpu-name", "-g", help="GPU name", default=None)
|
||||||
|
parser.add_argument("--commit-hash", "-c", help="Commit hash", default=None)
|
||||||
|
parser.add_argument("--job", "-j", help="Optional job name required for uploading reports", default=None)
|
||||||
|
parser.add_argument(
|
||||||
|
"--report-repo-id", "-r", help="Optional report repository ID required for uploading reports", default=None
|
||||||
|
)
|
||||||
|
args = get_arguments(parser.parse_args())
|
||||||
|
|
||||||
|
# Initialize accumulators for collated report
|
||||||
|
total_status_count = {
|
||||||
|
"passed": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"skipped": 0,
|
||||||
|
"error": 0,
|
||||||
|
None: 0,
|
||||||
|
}
|
||||||
|
collated_report_buffer = []
|
||||||
|
|
||||||
|
path = args.path
|
||||||
|
machine_type = args.machine_type
|
||||||
|
gpu_name = args.gpu_name
|
||||||
|
commit_hash = args.commit_hash
|
||||||
|
job = args.job
|
||||||
|
report_repo_id = args.report_repo_id
|
||||||
|
|
||||||
|
# Find the origin directory based on machine type
|
||||||
|
origin = path
|
||||||
|
for p in path.iterdir():
|
||||||
|
if machine_type in p.name:
|
||||||
|
origin = p
|
||||||
|
break
|
||||||
|
|
||||||
|
# Loop through model directories and create collated reports
|
||||||
|
for model_dir in sorted(origin.iterdir()):
|
||||||
|
# Create a new entry for the model
|
||||||
|
model_name = model_dir.name.removesuffix("_test_reports")
|
||||||
|
report = {"model": model_name, "results": []}
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# Read short summary
|
||||||
|
with open(model_dir / "summary_short.txt", "r") as f:
|
||||||
|
short_summary_lines = f.readlines()
|
||||||
|
|
||||||
|
# Parse short summary
|
||||||
|
for line in short_summary_lines[1:]:
|
||||||
|
status, count = parse_short_summary_line(line)
|
||||||
|
total_status_count[status] += count
|
||||||
|
if status:
|
||||||
|
result = {
|
||||||
|
"status": status,
|
||||||
|
"test": line.split(status.upper(), maxsplit=1)[1].strip(),
|
||||||
|
"count": count,
|
||||||
|
}
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
# Add short summaries to report
|
||||||
|
report["results"] = results
|
||||||
|
|
||||||
|
collated_report_buffer.append(report)
|
||||||
|
|
||||||
|
# Write collated report
|
||||||
|
with open(f"collated_reports_{commit_hash}.json", "w") as f:
|
||||||
|
json.dump(
|
||||||
|
{
|
||||||
|
"gpu_name": gpu_name,
|
||||||
|
"machine_type": machine_type,
|
||||||
|
"commit_hash": commit_hash,
|
||||||
|
"total_status_count": total_status_count,
|
||||||
|
"results": collated_report_buffer,
|
||||||
|
},
|
||||||
|
f,
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
if job and report_repo_id:
|
||||||
|
upload_collated_report(job, report_repo_id, f"collated_reports_{commit_hash}.json")
|
Reference in New Issue
Block a user