mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
Benchmarking v2 GH workflows (#40716)
* WIP benchmark v2 workflow * Container was missing * Change to sandbox branch name * Wrong place for image name * Variable declarations * Remove references to file logging * Remove unnecessary step * Fix deps install * Syntax * Add workdir * Add upload feature * typo * No need for hf_transfer * Pass in runner * Runner config * Runner config * Runner config * Runner config * Runner config * mi325 caller * Name workflow runs properly * Copy-paste error * Add final repo IDs and schedule * Review comments * Remove wf params * Remove parametrization from worfkflow files * Fix callers * Change push trigger to pull_request + label * Add back schedule event * Push to the same dataset * Simplify parameter description
This commit is contained in:
82
.github/workflows/benchmark_v2.yml
vendored
Normal file
82
.github/workflows/benchmark_v2.yml
vendored
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
name: Benchmark v2 Framework
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
runner:
|
||||||
|
description: 'GH Actions runner group to use'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
commit_sha:
|
||||||
|
description: 'Commit SHA to benchmark'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
upload_to_hub:
|
||||||
|
description: 'Uploading results to a HuggingFace Dataset'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: 'false'
|
||||||
|
run_id:
|
||||||
|
description: 'Custom run ID for organizing results (auto-generated if not provided)'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
benchmark_repo_id:
|
||||||
|
description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
|
||||||
|
env:
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
|
TRANSFORMERS_IS_CI: yes
|
||||||
|
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||||
|
# This token is created under the bot `hf-transformers-bot`.
|
||||||
|
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
benchmark-v2:
|
||||||
|
name: Benchmark v2
|
||||||
|
runs-on: ${{ inputs.runner }}
|
||||||
|
if: |
|
||||||
|
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) ||
|
||||||
|
(github.event_name == 'schedule')
|
||||||
|
container:
|
||||||
|
image: huggingface/transformers-pytorch-gpu
|
||||||
|
options: --gpus all --privileged --ipc host --shm-size "16gb"
|
||||||
|
steps:
|
||||||
|
- name: Get repo
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.commit_sha || github.sha }}
|
||||||
|
|
||||||
|
- name: Install benchmark dependencies
|
||||||
|
run: |
|
||||||
|
python3 -m pip install -r benchmark_v2/requirements.txt
|
||||||
|
|
||||||
|
- name: Reinstall transformers in edit mode
|
||||||
|
run: |
|
||||||
|
python3 -m pip uninstall -y transformers
|
||||||
|
python3 -m pip install -e ".[torch]"
|
||||||
|
|
||||||
|
- name: Show installed libraries and their versions
|
||||||
|
run: |
|
||||||
|
python3 -m pip list
|
||||||
|
python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
|
||||||
|
python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
|
||||||
|
python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true
|
||||||
|
nvidia-smi || true
|
||||||
|
|
||||||
|
- name: Run benchmark v2
|
||||||
|
working-directory: benchmark_v2
|
||||||
|
run: |
|
||||||
|
echo "Running benchmarks"
|
||||||
|
python3 run_benchmarks.py \
|
||||||
|
--commit-id '${{ inputs.commit_sha || github.sha }}' \
|
||||||
|
--upload-to-hub '${{ inputs.upload_to_hub || false}}' \
|
||||||
|
--run-id '${{ inputs.run_id }}' \
|
||||||
|
--benchmark-repo-id '${{ inputs.benchmark_repo_id}}' \
|
||||||
|
--log-level INFO
|
||||||
|
env:
|
||||||
|
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
20
.github/workflows/benchmark_v2_a10_caller.yml
vendored
Normal file
20
.github/workflows/benchmark_v2_a10_caller.yml
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
name: Benchmark v2 Scheduled Runner - A10 Single-GPU
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Run daily at 16:30 UTC
|
||||||
|
- cron: "30 16 * * *"
|
||||||
|
pull_request:
|
||||||
|
types: [ opened, labeled, reopened, synchronize ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
benchmark-v2-default:
|
||||||
|
name: Benchmark v2 - Default Models
|
||||||
|
uses: ./.github/workflows/benchmark_v2.yml
|
||||||
|
with:
|
||||||
|
runner: aws-g5-4xlarge-cache-use1-public-80
|
||||||
|
commit_sha: ${{ github.sha }}
|
||||||
|
upload_to_hub: true
|
||||||
|
run_id: ${{ github.run_id }}
|
||||||
|
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
|
||||||
|
secrets: inherit
|
20
.github/workflows/benchmark_v2_mi325_caller.yml
vendored
Normal file
20
.github/workflows/benchmark_v2_mi325_caller.yml
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
name: Benchmark v2 Scheduled Runner - MI325 Single-GPU
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Run daily at 16:30 UTC
|
||||||
|
- cron: "30 16 * * *"
|
||||||
|
pull_request:
|
||||||
|
types: [ opened, labeled, reopened, synchronize ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
benchmark-v2-default:
|
||||||
|
name: Benchmark v2 - Default Models
|
||||||
|
uses: ./.github/workflows/benchmark_v2.yml
|
||||||
|
with:
|
||||||
|
runner: amd-mi325-ci-1gpu
|
||||||
|
commit_sha: ${{ github.sha }}
|
||||||
|
upload_to_hub: true
|
||||||
|
run_id: ${{ github.run_id }}
|
||||||
|
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
|
||||||
|
secrets: inherit
|
@ -21,6 +21,36 @@ python run_benchmarks.py \
|
|||||||
--num-tokens-to-generate 200
|
--num-tokens-to-generate 200
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Uploading Results to HuggingFace Dataset
|
||||||
|
|
||||||
|
You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Upload to a public dataset with auto-generated run ID
|
||||||
|
python run_benchmarks.py --upload-to-hf username/benchmark-results
|
||||||
|
|
||||||
|
# Upload with a custom run ID for easy identification
|
||||||
|
python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Dataset Directory Structure:**
|
||||||
|
```
|
||||||
|
dataset_name/
|
||||||
|
├── 2025-01-15/
|
||||||
|
│ ├── runs/ # Non-scheduled runs (manual, PR, etc.)
|
||||||
|
│ │ └── 123-1245151651/ # GitHub run number and ID
|
||||||
|
│ │ └── benchmark_results/
|
||||||
|
│ │ ├── benchmark_summary_20250115_143022.json
|
||||||
|
│ │ └── model-name/
|
||||||
|
│ │ └── model-name_benchmark_20250115_143022.json
|
||||||
|
│ └── benchmark_results_abc123de/ # Scheduled runs (daily CI)
|
||||||
|
│ ├── benchmark_summary_20250115_143022.json
|
||||||
|
│ └── model-name/
|
||||||
|
│ └── model-name_benchmark_20250115_143022.json
|
||||||
|
└── 2025-01-16/
|
||||||
|
└── ...
|
||||||
|
```
|
||||||
|
|
||||||
### Running Specific Benchmarks
|
### Running Specific Benchmarks
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -20,7 +20,6 @@ import torch
|
|||||||
from benchmark_framework import ModelBenchmark
|
from benchmark_framework import ModelBenchmark
|
||||||
|
|
||||||
|
|
||||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||||
torch.set_float32_matmul_precision("high")
|
torch.set_float32_matmul_precision("high")
|
||||||
|
|
||||||
|
@ -3,4 +3,5 @@ psutil>=5.8.0
|
|||||||
gpustat>=1.0.0
|
gpustat>=1.0.0
|
||||||
torch>=2.0.0
|
torch>=2.0.0
|
||||||
transformers>=4.30.0
|
transformers>=4.30.0
|
||||||
datasets>=2.10.0
|
datasets>=2.10.0
|
||||||
|
huggingface_hub>=0.16.0
|
@ -24,6 +24,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@ -160,7 +161,12 @@ def run_single_benchmark(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any], logger: logging.Logger) -> str:
|
def generate_summary_report(
|
||||||
|
output_dir: str,
|
||||||
|
benchmark_results: dict[str, Any],
|
||||||
|
logger: logging.Logger,
|
||||||
|
benchmark_run_uuid: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
"""Generate a summary report of all benchmark runs."""
|
"""Generate a summary report of all benchmark runs."""
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json")
|
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json")
|
||||||
@ -168,6 +174,7 @@ def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any],
|
|||||||
summary_data = {
|
summary_data = {
|
||||||
"run_metadata": {
|
"run_metadata": {
|
||||||
"timestamp": datetime.utcnow().isoformat(),
|
"timestamp": datetime.utcnow().isoformat(),
|
||||||
|
"benchmark_run_uuid": benchmark_run_uuid,
|
||||||
"total_benchmarks": len(benchmark_results),
|
"total_benchmarks": len(benchmark_results),
|
||||||
"successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
|
"successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
|
||||||
"failed_benchmarks": len([r for r in benchmark_results.values() if r is None]),
|
"failed_benchmarks": len([r for r in benchmark_results.values() if r is None]),
|
||||||
@ -183,9 +190,115 @@ def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any],
|
|||||||
return summary_file
|
return summary_file
|
||||||
|
|
||||||
|
|
||||||
|
def upload_results_to_hf_dataset(
|
||||||
|
output_dir: str,
|
||||||
|
summary_file: str,
|
||||||
|
dataset_name: str,
|
||||||
|
run_id: Optional[str] = None,
|
||||||
|
logger: Optional[logging.Logger] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Upload benchmark results to a HuggingFace Dataset.
|
||||||
|
Based on upload_collated_report() from utils/collated_reports.py
|
||||||
|
Args:
|
||||||
|
output_dir: Local output directory containing results
|
||||||
|
summary_file: Path to the summary file
|
||||||
|
dataset_name: Name of the HuggingFace dataset to upload to
|
||||||
|
run_id: Unique run identifier (if None, will generate one)
|
||||||
|
logger: Logger instance
|
||||||
|
Returns:
|
||||||
|
The run_id used for the upload, None if upload failed
|
||||||
|
"""
|
||||||
|
if logger is None:
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
|
api = HfApi()
|
||||||
|
|
||||||
|
if run_id is None:
|
||||||
|
github_run_number = os.getenv("GITHUB_RUN_NUMBER")
|
||||||
|
github_run_id = os.getenv("GITHUB_RUN_ID")
|
||||||
|
if github_run_number and github_run_id:
|
||||||
|
run_id = f"{github_run_number}-{github_run_id}"
|
||||||
|
|
||||||
|
date_folder = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
github_event_name = os.getenv("GITHUB_EVENT_NAME")
|
||||||
|
if github_event_name != "schedule":
|
||||||
|
# Non-scheduled runs go under a runs subfolder
|
||||||
|
repo_path = f"{date_folder}/runs/{run_id}/benchmark_results"
|
||||||
|
else:
|
||||||
|
# Scheduled runs go directly under the date
|
||||||
|
repo_path = f"{date_folder}/{run_id}/benchmark_results"
|
||||||
|
|
||||||
|
logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
|
||||||
|
token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
|
||||||
|
|
||||||
|
# Upload all files in the output directory
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
output_path = Path(output_dir)
|
||||||
|
|
||||||
|
for file_path in output_path.rglob("*"):
|
||||||
|
if file_path.is_file():
|
||||||
|
# Calculate relative path from output_dir
|
||||||
|
relative_path = file_path.relative_to(output_path)
|
||||||
|
path_in_repo = f"{repo_path}/{relative_path}"
|
||||||
|
|
||||||
|
logger.debug(f"Uploading {file_path} to {path_in_repo}")
|
||||||
|
|
||||||
|
api.upload_file(
|
||||||
|
path_or_fileobj=str(file_path),
|
||||||
|
path_in_repo=path_in_repo,
|
||||||
|
repo_id=dataset_name,
|
||||||
|
repo_type="dataset",
|
||||||
|
token=token,
|
||||||
|
commit_message=f"Upload benchmark results for run {run_id}",
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return run_id
|
||||||
|
|
||||||
|
except Exception as upload_error:
|
||||||
|
logger.error(f"Failed to upload results: {upload_error}")
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main entry point for the benchmarking script."""
|
"""Main entry point for the benchmarking script."""
|
||||||
parser = argparse.ArgumentParser(description="Run all benchmarks in the ./benches directory")
|
# Generate a unique UUID for this benchmark run
|
||||||
|
benchmark_run_uuid = str(uuid.uuid4())[:8]
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Run all benchmarks in the ./benches directory",
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
# Run all available benchmarks
|
||||||
|
python3 run_benchmarks.py
|
||||||
|
|
||||||
|
# Run with specific model and upload to HuggingFace Dataset
|
||||||
|
python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results
|
||||||
|
|
||||||
|
# Run with custom run ID and upload to HuggingFace Dataset
|
||||||
|
python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks
|
||||||
|
|
||||||
|
# Run only specific benchmarks with file logging
|
||||||
|
python3 run_benchmarks.py --include llama --enable-file-logging
|
||||||
|
""", # noqa: W293
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-dir",
|
"--output-dir",
|
||||||
@ -228,20 +341,29 @@ def main():
|
|||||||
|
|
||||||
parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names")
|
parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names")
|
||||||
|
|
||||||
parser.add_argument("--enable-mock", action="store_true", help="Enable mock benchmark (skipped by default)")
|
|
||||||
|
|
||||||
parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)")
|
parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)"
|
"--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--upload-to-hub",
|
||||||
|
type=str,
|
||||||
|
help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Setup logging
|
# Setup logging
|
||||||
logger = setup_logging(args.log_level, args.enable_file_logging)
|
logger = setup_logging(args.log_level, args.enable_file_logging)
|
||||||
|
|
||||||
logger.info("Starting benchmark discovery and execution")
|
logger.info("Starting benchmark discovery and execution")
|
||||||
|
logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
|
||||||
logger.info(f"Output directory: {args.output_dir}")
|
logger.info(f"Output directory: {args.output_dir}")
|
||||||
logger.info(f"Benches directory: {args.benches_dir}")
|
logger.info(f"Benches directory: {args.benches_dir}")
|
||||||
|
|
||||||
@ -286,9 +408,6 @@ def main():
|
|||||||
if args.model_id:
|
if args.model_id:
|
||||||
benchmark_kwargs["model_id"] = args.model_id
|
benchmark_kwargs["model_id"] = args.model_id
|
||||||
|
|
||||||
# Add enable_mock flag for mock benchmark
|
|
||||||
benchmark_kwargs["enable_mock"] = args.enable_mock
|
|
||||||
|
|
||||||
# Add commit_id if provided
|
# Add commit_id if provided
|
||||||
if args.commit_id:
|
if args.commit_id:
|
||||||
benchmark_kwargs["commit_id"] = args.commit_id
|
benchmark_kwargs["commit_id"] = args.commit_id
|
||||||
@ -306,7 +425,27 @@ def main():
|
|||||||
successful_count += 1
|
successful_count += 1
|
||||||
|
|
||||||
# Generate summary report
|
# Generate summary report
|
||||||
summary_file = generate_summary_report(args.output_dir, benchmark_results, logger)
|
summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid)
|
||||||
|
|
||||||
|
# Upload results to HuggingFace Dataset if requested
|
||||||
|
upload_run_id = None
|
||||||
|
if args.upload_to_hub:
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("UPLOADING TO HUGGINGFACE DATASET")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
# Use provided run_id or fallback to benchmark run UUID
|
||||||
|
effective_run_id = args.run_id or benchmark_run_uuid
|
||||||
|
upload_run_id = upload_results_to_hf_dataset(
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
summary_file=summary_file,
|
||||||
|
dataset_name=args.upload_to_hub,
|
||||||
|
run_id=effective_run_id,
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
if upload_run_id:
|
||||||
|
logger.info(f"Upload completed with run ID: {upload_run_id}")
|
||||||
|
else:
|
||||||
|
logger.warning("Upload failed - continuing with local results")
|
||||||
|
|
||||||
# Final summary
|
# Final summary
|
||||||
total_benchmarks = len(filtered_benchmarks)
|
total_benchmarks = len(filtered_benchmarks)
|
||||||
@ -321,6 +460,16 @@ def main():
|
|||||||
logger.info(f"Output directory: {args.output_dir}")
|
logger.info(f"Output directory: {args.output_dir}")
|
||||||
logger.info(f"Summary report: {summary_file}")
|
logger.info(f"Summary report: {summary_file}")
|
||||||
|
|
||||||
|
if args.upload_to_hub:
|
||||||
|
if upload_run_id:
|
||||||
|
logger.info(f"HuggingFace Dataset: {args.upload_to_hub}")
|
||||||
|
logger.info(f"Run ID: {upload_run_id}")
|
||||||
|
logger.info(
|
||||||
|
f"View results: https://huggingface.co/datasets/{args.upload_to_hub}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning("Upload to HuggingFace Dataset failed")
|
||||||
|
|
||||||
if failed_count > 0:
|
if failed_count > 0:
|
||||||
logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.")
|
logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.")
|
||||||
return 1
|
return 1
|
||||||
|
Reference in New Issue
Block a user