From b9d337b6f381c99d6e5257d23d9f902e2bf87822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20Hadnagy?= Date: Mon, 22 Sep 2025 16:13:46 +0200 Subject: [PATCH] Add write token for uploading benchmark results to the Hub (#41047) * Separate write token for Hub upload * Address review comments * Address review comments --- .github/workflows/benchmark_v2.yml | 3 ++- benchmark_v2/README.md | 14 ++++++++++++-- benchmark_v2/run_benchmarks.py | 10 +++++++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index a2c25908d12..dc078e67ea9 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -70,7 +70,8 @@ jobs: python3 run_benchmarks.py \ --commit-id '${{ inputs.commit_sha || github.sha }}' \ --run-id '${{ inputs.run_id }}' \ - --upload-to-hub '${{ inputs.benchmark_repo_id}}' \ + --push-to-hub '${{ inputs.benchmark_repo_id}}' \ + --token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \ --log-level INFO env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} \ No newline at end of file diff --git a/benchmark_v2/README.md b/benchmark_v2/README.md index 1d34de6408c..bcbb9cc71ef 100644 --- a/benchmark_v2/README.md +++ b/benchmark_v2/README.md @@ -27,10 +27,13 @@ You can automatically upload benchmark results to a HuggingFace Dataset for trac ```bash # Upload to a public dataset with auto-generated run ID -python run_benchmarks.py --upload-to-hf username/benchmark-results +python run_benchmarks.py --upload-to-hub username/benchmark-results # Upload with a custom run ID for easy identification -python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1 +python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1 + +# Upload with custom HuggingFace token (if not set in environment) +python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here ``` **Dataset Directory Structure:** @@ -51,6 +54,13 @@ dataset_name/ └── ... ``` +**Authentication for Uploads:** + +For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence): + +1. Command line: `--token hf_your_token_here` +3. Environment variable: `HF_TOKEN` + ### Running Specific Benchmarks ```bash diff --git a/benchmark_v2/run_benchmarks.py b/benchmark_v2/run_benchmarks.py index 44f6515a2c3..18e8a9a7765 100755 --- a/benchmark_v2/run_benchmarks.py +++ b/benchmark_v2/run_benchmarks.py @@ -195,6 +195,7 @@ def upload_results_to_hf_dataset( summary_file: str, dataset_name: str, run_id: Optional[str] = None, + token: Optional[str] = None, logger: Optional[logging.Logger] = None, ) -> Optional[str]: """ @@ -205,6 +206,7 @@ def upload_results_to_hf_dataset( summary_file: Path to the summary file dataset_name: Name of the HuggingFace dataset to upload to run_id: Unique run identifier (if None, will generate one) + token: HuggingFace token for authentication (if None, will use environment variables) logger: Logger instance Returns: The run_id used for the upload, None if upload failed @@ -237,9 +239,6 @@ def upload_results_to_hf_dataset( logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'") try: - # Get the authentication token (prioritize specific token, fallback to HF_TOKEN) - token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN") - # Upload all files in the output directory from pathlib import Path @@ -357,6 +356,10 @@ Examples: "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)" ) + parser.add_argument( + "--token", type=str, help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)" + ) + args = parser.parse_args() # Setup logging @@ -440,6 +443,7 @@ Examples: summary_file=summary_file, dataset_name=args.upload_to_hub, run_id=effective_run_id, + token=args.token, logger=logger, ) if upload_run_id: