CI with torch 2.7

try
2025-10-21 09:44:02 +08:00 · 2025-04-03 21:51:03 +02:00 · 2025-04-03 21:37:38 +02:00 · 2025-04-03 21:12:23 +02:00 · 2025-04-03 20:50:37 +02:00 · 2025-04-03 20:24:28 +02:00
8 changed files with 102 additions and 91 deletions
--- a/.github/workflows/check_failed_model_tests.yml
+++ b/.github/workflows/check_failed_model_tests.yml
@ -71,13 +71,13 @@ jobs:
      - name: Check failed tests
        working-directory: /transformers
-        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
+        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures_temp.json --output_file new_model_failures_with_bad_commit_temp.json
      - name: Show results
        working-directory: /transformers
        run: |
-          ls -l new_model_failures_with_bad_commit.json
+          ls -l new_model_failures_with_bad_commit_temp.json
-          cat new_model_failures_with_bad_commit.json
+          cat new_model_failures_with_bad_commit_temp.json
      - name: Checkout back
        working-directory: /transformers
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -93,6 +93,10 @@ jobs:
        run: |
          python3 utils/print_env.py
      - name: Installed torch 2.7 RC
        working-directory: /transformers
        run: python3 -m pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -2,12 +2,12 @@ name: Self-hosted runner (scheduled)
 on:
-  repository_dispatch:
+#  repository_dispatch:
-  schedule:
+#  schedule:
-    - cron: "17 2 * * *"
+#    - cron: "17 2 * * *"
  push:
    branches:
-      - run_scheduled_ci*
+      - ci_with_torch_2.7
 jobs:
  model-ci:
@ -20,59 +20,59 @@ jobs:
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
    secrets: inherit
-
+#
-  torch-pipeline:
+#  torch-pipeline:
-    name: Torch pipeline CI
+#    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
+#    uses: ./.github/workflows/self-scheduled.yml
-    with:
+#    with:
-      job: run_pipelines_torch_gpu
+#      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
+#      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
-      runner: daily-ci
+#      runner: daily-ci
-      docker: huggingface/transformers-pytorch-gpu
+#      docker: huggingface/transformers-pytorch-gpu
-      ci_event: Daily CI
+#      ci_event: Daily CI
-    secrets: inherit
+#    secrets: inherit
-
+#
-  tf-pipeline:
+#  tf-pipeline:
-    name: TF pipeline CI
+#    name: TF pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
+#    uses: ./.github/workflows/self-scheduled.yml
-    with:
+#    with:
-      job: run_pipelines_tf_gpu
+#      job: run_pipelines_tf_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-tf"
+#      slack_report_channel: "#transformers-ci-daily-pipeline-tf"
-      runner: daily-ci
+#      runner: daily-ci
-      docker: huggingface/transformers-tensorflow-gpu
+#      docker: huggingface/transformers-tensorflow-gpu
-      ci_event: Daily CI
+#      ci_event: Daily CI
-    secrets: inherit
+#    secrets: inherit
-
+#
-  example-ci:
+#  example-ci:
-    name: Example CI
+#    name: Example CI
-    uses: ./.github/workflows/self-scheduled.yml
+#    uses: ./.github/workflows/self-scheduled.yml
-    with:
+#    with:
-      job: run_examples_gpu
+#      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-examples"
+#      slack_report_channel: "#transformers-ci-daily-examples"
-      runner: daily-ci
+#      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
+#      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
+#      ci_event: Daily CI
-    secrets: inherit
+#    secrets: inherit
-
+#
-  deepspeed-ci:
+#  deepspeed-ci:
-    name: DeepSpeed CI
+#    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
+#    uses: ./.github/workflows/self-scheduled.yml
-    with:
+#    with:
-      job: run_torch_cuda_extensions_gpu
+#      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-deepspeed"
+#      slack_report_channel: "#transformers-ci-daily-deepspeed"
-      runner: daily-ci
+#      runner: daily-ci
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
+#      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Daily CI
+#      ci_event: Daily CI
-      working-directory-prefix: /workspace
+#      working-directory-prefix: /workspace
-    secrets: inherit
+#    secrets: inherit
-
+#
-  quantization-ci:
+#  quantization-ci:
-    name: Quantization CI
+#    name: Quantization CI
-    uses: ./.github/workflows/self-scheduled.yml
+#    uses: ./.github/workflows/self-scheduled.yml
-    with:
+#    with:
-      job: run_quantization_torch_gpu
+#      job: run_quantization_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-quantization"
+#      slack_report_channel: "#transformers-ci-daily-quantization"
-      runner: daily-ci
+#      runner: daily-ci
-      docker: huggingface/transformers-quantization-latest-gpu
+#      docker: huggingface/transformers-quantization-latest-gpu
-      ci_event: Daily CI
+#      ci_event: Daily CI
-    secrets: inherit
+#    secrets: inherit
--- a/utils/check_bad_commit.py
+++ b/utils/check_bad_commit.py
@ -49,6 +49,9 @@ if len(result.stderr) > 0:
    if "ERROR: file or directory not found: " in result.stderr:
        print("test not found in this commit")
        exit(0)
    elif "ERROR: not found: " in result.stderr:
        print("test not found in this commit")
        exit(0)
    else:
        print(f"pytest failed to run: {{result.stderr}}")
        exit(-1)
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@ -31,6 +31,7 @@ def get_daily_ci_runs(token, num_runs=7):
 def get_last_daily_ci_runs(token):
    """Get the last completed workflow run id of the scheduled (daily) CI."""
    return "14233781160"
    workflow_runs = get_daily_ci_runs(token)
    workflow_run_id = None
    for workflow_run in workflow_runs:
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@ -523,20 +523,20 @@ class Message:
        extra_blocks = self.get_new_model_failure_blocks(to_truncate=False)
        if extra_blocks:
            failure_text = extra_blocks[-1]["text"]["text"]
-            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
            with open(file_path, "w", encoding="UTF-8") as fp:
                fp.write(failure_text)
            # upload results to Hub dataset
-            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
            commit_info = api.upload_file(
                path_or_fileobj=file_path,
-                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt",
+                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt",
                repo_id="hf-internal-testing/transformers_daily_ci",
                repo_type="dataset",
                token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
            )
-            url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
+            url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt"
            # extra processing to save to json format
            new_failed_tests = {}
@ -550,15 +550,15 @@ class Message:
                        new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
                    for url, device in items:
                        new_failed_tests[model][f"{device}-gpu"].append(line)
-            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
            with open(file_path, "w", encoding="UTF-8") as fp:
                json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
            # upload results to Hub dataset
-            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
            _ = api.upload_file(
                path_or_fileobj=file_path,
-                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
+                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.json",
                repo_id="hf-internal-testing/transformers_daily_ci",
                repo_type="dataset",
                token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
@ -1220,6 +1220,7 @@ if __name__ == "__main__":
    target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
    is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
    is_scheduled_ci_run = True
    # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
    # results.
@ -1228,14 +1229,14 @@ if __name__ == "__main__":
            json.dump(model_results, fp, indent=4, ensure_ascii=False)
        # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
-        if is_scheduled_ci_run:
+        # if is_scheduled_ci_run:
-            api.upload_file(
+        #     api.upload_file(
-                path_or_fileobj=f"ci_results_{job_name}/model_results.json",
+        #         path_or_fileobj=f"ci_results_{job_name}/model_results.json",
-                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
+        #         path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+        #         repo_id="hf-internal-testing/transformers_daily_ci",
-                repo_type="dataset",
+        #         repo_type="dataset",
-                token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
+        #         token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
-            )
+        #     )
    # Must have the same keys as in `additional_results`.
    # The values are used as the file names where to save the corresponding CI job results.
@ -1250,14 +1251,14 @@ if __name__ == "__main__":
            json.dump(job_result, fp, indent=4, ensure_ascii=False)
        # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
-        if is_scheduled_ci_run:
+        # if is_scheduled_ci_run:
-            api.upload_file(
+        #     api.upload_file(
-                path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
+        #         path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
-                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
+        #         path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+        #         repo_id="hf-internal-testing/transformers_daily_ci",
-                repo_type="dataset",
+        #         repo_type="dataset",
-                token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
+        #         token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
-            )
+        #     )
    prev_ci_artifacts = None
    if is_scheduled_ci_run:
--- a/utils/process_bad_commit_report.py
+++ b/utils/process_bad_commit_report.py
@ -24,7 +24,7 @@ from huggingface_hub import HfApi
 if __name__ == "__main__":
    api = HfApi()
-    with open("new_model_failures_with_bad_commit.json") as fp:
+    with open("new_model_failures_with_bad_commit_temp.json") as fp:
        data = json.load(fp)
    # TODO: extend
@ -68,16 +68,16 @@ if __name__ == "__main__":
        new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
    # Upload to Hub and get the url
-    with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
+    with open("new_model_failures_with_bad_commit_grouped_by_authors_temp.json", "w") as fp:
        json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
    commit_info = api.upload_file(
-        path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
+        path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
-        path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
+        path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
        repo_id="hf-internal-testing/transformers_daily_ci",
        repo_type="dataset",
        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
    )
-    url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
+    url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json"
    # Add `GH_` prefix as keyword mention
    output = {}
--- a/utils/split_model_tests.py
+++ b/utils/split_model_tests.py
@ -62,4 +62,6 @@ if __name__ == "__main__":
        start = end
        end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
        model_splits.append(d[start:end])
    # model_splits = [["models/vit"]]
    print(model_splits)
Author	SHA1	Message	Date
ydshieh	ba92002928	CI with torch 2.7	2025-04-03 21:51:03 +02:00
ydshieh	ee25237a2f	try	2025-04-03 21:37:38 +02:00
ydshieh	246db22767	try	2025-04-03 21:12:23 +02:00
ydshieh	b6328584bf	try	2025-04-03 20:50:37 +02:00
ydshieh	98adb0d92e	try	2025-04-03 20:24:28 +02:00