Add write token for uploading benchmark results to the Hub (#41047)

* Separate write token for Hub upload

* Address review comments

* Address review comments
This commit is contained in:
Ákos Hadnagy
2025-09-22 16:13:46 +02:00
committed by GitHub
parent 646ff51d1a
commit b9d337b6f3
3 changed files with 21 additions and 6 deletions

View File

@ -70,7 +70,8 @@ jobs:
python3 run_benchmarks.py \
--commit-id '${{ inputs.commit_sha || github.sha }}' \
--run-id '${{ inputs.run_id }}' \
--upload-to-hub '${{ inputs.benchmark_repo_id}}' \
--push-to-hub '${{ inputs.benchmark_repo_id}}' \
--token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
--log-level INFO
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}

View File

@ -27,10 +27,13 @@ You can automatically upload benchmark results to a HuggingFace Dataset for trac
```bash
# Upload to a public dataset with auto-generated run ID
python run_benchmarks.py --upload-to-hf username/benchmark-results
python run_benchmarks.py --upload-to-hub username/benchmark-results
# Upload with a custom run ID for easy identification
python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1
# Upload with custom HuggingFace token (if not set in environment)
python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here
```
**Dataset Directory Structure:**
@ -51,6 +54,13 @@ dataset_name/
└── ...
```
**Authentication for Uploads:**
For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence):
1. Command line: `--token hf_your_token_here`
3. Environment variable: `HF_TOKEN`
### Running Specific Benchmarks
```bash

View File

@ -195,6 +195,7 @@ def upload_results_to_hf_dataset(
summary_file: str,
dataset_name: str,
run_id: Optional[str] = None,
token: Optional[str] = None,
logger: Optional[logging.Logger] = None,
) -> Optional[str]:
"""
@ -205,6 +206,7 @@ def upload_results_to_hf_dataset(
summary_file: Path to the summary file
dataset_name: Name of the HuggingFace dataset to upload to
run_id: Unique run identifier (if None, will generate one)
token: HuggingFace token for authentication (if None, will use environment variables)
logger: Logger instance
Returns:
The run_id used for the upload, None if upload failed
@ -237,9 +239,6 @@ def upload_results_to_hf_dataset(
logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")
try:
# Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
# Upload all files in the output directory
from pathlib import Path
@ -357,6 +356,10 @@ Examples:
"--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
)
parser.add_argument(
"--token", type=str, help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)"
)
args = parser.parse_args()
# Setup logging
@ -440,6 +443,7 @@ Examples:
summary_file=summary_file,
dataset_name=args.upload_to_hub,
run_id=effective_run_id,
token=args.token,
logger=logger,
)
if upload_run_id: