Add write token for uploading benchmark results to the Hub (#41047)

* Separate write token for Hub upload * Address review comments * Address review comments
2025-10-20 09:03:53 +08:00 · 2025-09-22 16:13:46 +02:00
parent 646ff51d1a
commit b9d337b6f3
3 changed files with 21 additions and 6 deletions
--- a/.github/workflows/benchmark_v2.yml
+++ b/.github/workflows/benchmark_v2.yml
@ -70,7 +70,8 @@ jobs:
          python3 run_benchmarks.py \
          --commit-id '${{ inputs.commit_sha || github.sha }}' \
          --run-id '${{ inputs.run_id }}' \
-          --upload-to-hub '${{ inputs.benchmark_repo_id}}' \
+          --push-to-hub '${{ inputs.benchmark_repo_id}}' \
+          --token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
          --log-level INFO
        env:
          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
--- a/benchmark_v2/README.md
+++ b/benchmark_v2/README.md
@ -27,10 +27,13 @@ You can automatically upload benchmark results to a HuggingFace Dataset for trac

 ```bash
 # Upload to a public dataset with auto-generated run ID
-python run_benchmarks.py --upload-to-hf username/benchmark-results
+python run_benchmarks.py --upload-to-hub username/benchmark-results

 # Upload with a custom run ID for easy identification
-python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
+python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1
+
+# Upload with custom HuggingFace token (if not set in environment)
+python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here
 ```

 **Dataset Directory Structure:**
@ -51,6 +54,13 @@ dataset_name/
    └── ...
 ```

+**Authentication for Uploads:**
+
+For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence):
+
+1. Command line: `--token hf_your_token_here`
+3. Environment variable: `HF_TOKEN`
+
 ### Running Specific Benchmarks

 ```bash
--- a/benchmark_v2/run_benchmarks.py
+++ b/benchmark_v2/run_benchmarks.py
@ -195,6 +195,7 @@ def upload_results_to_hf_dataset(
    summary_file: str,
    dataset_name: str,
    run_id: Optional[str] = None,
+    token: Optional[str] = None,
    logger: Optional[logging.Logger] = None,
 ) -> Optional[str]:
    """
@ -205,6 +206,7 @@ def upload_results_to_hf_dataset(
        summary_file: Path to the summary file
        dataset_name: Name of the HuggingFace dataset to upload to
        run_id: Unique run identifier (if None, will generate one)
+        token: HuggingFace token for authentication (if None, will use environment variables)
        logger: Logger instance
    Returns:
        The run_id used for the upload, None if upload failed
@ -237,9 +239,6 @@ def upload_results_to_hf_dataset(
    logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")

    try:
-        # Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
-        token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
-
        # Upload all files in the output directory
        from pathlib import Path

@ -357,6 +356,10 @@ Examples:
        "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
    )

+    parser.add_argument(
+        "--token", type=str, help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)"
+    )
+
    args = parser.parse_args()

    # Setup logging
@ -440,6 +443,7 @@ Examples:
                summary_file=summary_file,
                dataset_name=args.upload_to_hub,
                run_id=effective_run_id,
+                token=args.token,
                logger=logger,
            )
            if upload_run_id: