From b9d337b6f381c99d6e5257d23d9f902e2bf87822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81kos=20Hadnagy?= <akos@ahadnagy.com>
Date: Mon, 22 Sep 2025 16:13:46 +0200
Subject: [PATCH] Add write token for uploading benchmark results to the Hub
 (#41047)

* Separate write token for Hub upload

* Address review comments

* Address review comments
---
 .github/workflows/benchmark_v2.yml |  3 ++-
 benchmark_v2/README.md             | 14 ++++++++++++--
 benchmark_v2/run_benchmarks.py     | 10 +++++++---
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml
index a2c25908d12..dc078e67ea9 100644
--- a/.github/workflows/benchmark_v2.yml
+++ b/.github/workflows/benchmark_v2.yml
@@ -70,7 +70,8 @@ jobs:
           python3 run_benchmarks.py \
           --commit-id '${{ inputs.commit_sha || github.sha }}' \
           --run-id '${{ inputs.run_id }}' \
-          --upload-to-hub '${{ inputs.benchmark_repo_id}}' \
+          --push-to-hub '${{ inputs.benchmark_repo_id}}' \
+          --token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
           --log-level INFO
         env:
           HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
\ No newline at end of file
diff --git a/benchmark_v2/README.md b/benchmark_v2/README.md
index 1d34de6408c..bcbb9cc71ef 100644
--- a/benchmark_v2/README.md
+++ b/benchmark_v2/README.md
@@ -27,10 +27,13 @@ You can automatically upload benchmark results to a HuggingFace Dataset for trac
 
 ```bash
 # Upload to a public dataset with auto-generated run ID
-python run_benchmarks.py --upload-to-hf username/benchmark-results
+python run_benchmarks.py --upload-to-hub username/benchmark-results
 
 # Upload with a custom run ID for easy identification
-python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
+python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1
+
+# Upload with custom HuggingFace token (if not set in environment)
+python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here
 ```
 
 **Dataset Directory Structure:**
@@ -51,6 +54,13 @@ dataset_name/
     └── ...
 ```
 
+**Authentication for Uploads:**
+
+For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence):
+
+1. Command line: `--token hf_your_token_here`
+3. Environment variable: `HF_TOKEN`
+
 ### Running Specific Benchmarks
 
 ```bash
diff --git a/benchmark_v2/run_benchmarks.py b/benchmark_v2/run_benchmarks.py
index 44f6515a2c3..18e8a9a7765 100755
--- a/benchmark_v2/run_benchmarks.py
+++ b/benchmark_v2/run_benchmarks.py
@@ -195,6 +195,7 @@ def upload_results_to_hf_dataset(
     summary_file: str,
     dataset_name: str,
     run_id: Optional[str] = None,
+    token: Optional[str] = None,
     logger: Optional[logging.Logger] = None,
 ) -> Optional[str]:
     """
@@ -205,6 +206,7 @@ def upload_results_to_hf_dataset(
         summary_file: Path to the summary file
         dataset_name: Name of the HuggingFace dataset to upload to
         run_id: Unique run identifier (if None, will generate one)
+        token: HuggingFace token for authentication (if None, will use environment variables)
         logger: Logger instance
     Returns:
         The run_id used for the upload, None if upload failed
@@ -237,9 +239,6 @@ def upload_results_to_hf_dataset(
     logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")
 
     try:
-        # Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
-        token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
-
         # Upload all files in the output directory
         from pathlib import Path
 
@@ -357,6 +356,10 @@ Examples:
         "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
     )
 
+    parser.add_argument(
+        "--token", type=str, help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)"
+    )
+
     args = parser.parse_args()
 
     # Setup logging
@@ -440,6 +443,7 @@ Examples:
                 summary_file=summary_file,
                 dataset_name=args.upload_to_hub,
                 run_id=effective_run_id,
+                token=args.token,
                 logger=logger,
             )
             if upload_run_id: