fix

2025-10-20 17:13:56 +08:00 · 2025-10-17 22:21:10 +02:00 · 2025-10-17 21:30:29 +02:00 · 2025-10-17 20:59:00 +02:00 · 2025-10-17 20:30:23 +02:00 · 2025-10-17 15:40:54 +02:00
6 changed files with 161 additions and 108 deletions
--- a/.github/workflows/check_failed_tests.yml
+++ b/.github/workflows/check_failed_tests.yml
@ -41,9 +41,14 @@ env:

 jobs:
  check_new_failures:
-    name: " "
+    name: "Find commits for new failing tests"
+    strategy:
+      matrix:
+        run_idx: [1]
    runs-on:
      group: aws-g5-4xlarge-cache
+    outputs:
+      process: ${{ steps.check_file.outputs.process }}
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -54,14 +59,17 @@ jobs:
          path: /transformers/ci_results_${{ inputs.job }}

      - name: Check file
+        id: check_file
        working-directory: /transformers
        run: |
          if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
            echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
            echo "process=true" >> $GITHUB_ENV
+            echo "process=true" >> $GITHUB_OUTPUT
          else
            echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
            echo "process=false" >> $GITHUB_ENV
+            echo "process=false" >> $GITHUB_OUTPUT
          fi

      - uses: actions/download-artifact@v4
@ -118,6 +126,10 @@ jobs:
        run: |
          python3 utils/print_env.py

+      - name: Install pytest-flakefinder
+        if: ${{ env.process == 'true' }}
+        run: python3 -m pip install pytest-flakefinder
+
      - name: Show installed libraries and their versions
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
@ -126,25 +138,63 @@ jobs:
      - name: Check failed tests
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
-        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
+        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json

      - name: Show results
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
        run: |
-          ls -l new_failures_with_bad_commit.json
-          cat new_failures_with_bad_commit.json
+          ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
+          cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json

-      - name: Checkout back
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
+          path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
+
+  process_new_failures_with_commit_info:
+    name: "process bad commit reports"
+    needs: check_new_failures
+    if: needs.check_new_failures.outputs.process == 'true'
+    runs-on:
+      group: aws-g5-4xlarge-cache
+    container:
+      image: ${{ inputs.docker }}
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: ci_results_${{ inputs.job }}
+          path: /transformers/ci_results_${{ inputs.job }}
+
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: new_failures_with_bad_commit_${{ inputs.job }}*
+          path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
+          merge-multiple: true
+
+      - name: Check files
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
-          git checkout ${{ inputs.start_sha }}
+          ls -la /transformers
+          ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }}
+
+      # Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
+      # to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
+      - name: Merge files
+        shell: bash
+        working-directory: /transformers
+        run: |
+          cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}

      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -156,7 +206,6 @@ jobs:
      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -171,13 +220,12 @@ jobs:

      - name: Prepare Slack report title
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
          pip install slack_sdk
          echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV

      - name: Send processed report
-        if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
+        if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
        with:
          # Slack channel id, channel name, or user id to post message.
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -6,7 +6,7 @@ on:
    - cron: "17 2 * * *"
  push:
    branches:
-      - run_nvidia_ci*
+      - multi_jobs_to_check_bad_commit
  workflow_dispatch:
    inputs:
      prev_workflow_run_id:
@ -23,7 +23,7 @@ on:

 # Used for `push` to easily modify the target workflow runs to compare against
 env:
-    prev_workflow_run_id: ""
+    prev_workflow_run_id: "18548615847"
    other_workflow_run_id: ""


@ -49,72 +49,10 @@ jobs:
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-models"
+      slack_report_channel: "#transformers-ci-dummy"
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
      runner_type: "a10"
      report_repo_id: hf-internal-testing/transformers_daily_ci
      commit_sha: ${{ github.sha }}
    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
-      docker: huggingface/transformers-pytorch-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-examples"
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  trainer-fsdp-ci:
-    name: Trainer/FSDP CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_trainer_and_fsdp_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      docker: huggingface/transformers-all-latest-gpu
-      runner_type: "a10"
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Daily CI
-      working-directory-prefix: /workspace
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  quantization-ci:
-    name: Quantization CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_quantization_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-quantization"
-      docker: huggingface/transformers-quantization-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
--- a/tests/models/vit/test_modeling_vit.py
+++ b/tests/models/vit/test_modeling_vit.py
@ -219,6 +219,7 @@ class ViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        super().test_multi_gpu_data_parallel_forward()

    def test_config(self):
+        assert 1 == 2
        self.config_tester.run_common_tests()

    @unittest.skip(reason="ViT does not use inputs_embeds")
--- a/utils/check_bad_commit.py
+++ b/utils/check_bad_commit.py
@ -20,6 +20,7 @@ import os
 import re
 import subprocess

+import git
 import requests


@ -38,8 +39,14 @@ def create_script(target_test):
 import os
 import subprocess

+_ = subprocess.run(
+    ["python3", "-m", "pip", "install", "-e", "."],
+    capture_output = True,
+    text=True,
+)
+
 result = subprocess.run(
-    ["python3", "-m", "pytest", "-v", "-rfEp", f"{target_test}"],
+    ["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"],
    capture_output = True,
    text=True,
 )
@ -47,17 +54,20 @@ print(result.stdout)

 if f"FAILED {target_test}" in result.stdout:
    print("test failed")
-    exit(2)
+    exit(1)
 elif result.returncode != 0:
    if "ERROR: file or directory not found: " in result.stderr:
        print("test file or directory not found in this commit")
+        # git bisect treats exit code 125 as `test not found`. But this causes it not be able to make the conclusion
+        # if a test is added between the `good commit` (exclusive) and `bad commit` (inclusive) (in git bisect terminology).
+        # So we return 0 here in order to allow the process being able to identify the first commit that fails the test.
        exit(0)
    elif "ERROR: not found: " in result.stderr:
        print("test not found in this commit")
        exit(0)
    else:
        print(f"pytest gets unknown error: {{result.stderr}}")
-        exit(-1)
+        exit(1)

 print(f"pytest runs successfully.")
 exit(0)
@ -67,20 +77,63 @@ exit(0)
        fp.write(script.strip())


+def is_bad_commit(target_test, commit):
+    repo = git.Repo(".")  # or specify path to your repo
+
+    # Save the current HEAD reference
+    original_head = repo.head.commit
+
+    # Checkout to the commit
+    repo.git.checkout(commit)
+
+    create_script(target_test=target_test)
+
+    result = subprocess.run(
+        ["python3", "target_script.py"],
+        capture_output=True,
+        text=True,
+    )
+
+    # Restore to original commit
+    repo.git.checkout(original_head)
+
+    return result.returncode != 0
+
+
 def find_bad_commit(target_test, start_commit, end_commit):
-    """Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails.
+    """Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails.

    Args:
        target_test (`str`): The test to check.
-        start_commit (`str`): The latest commit.
-        end_commit (`str`): The earliest commit.
+        start_commit (`str`): The latest commit (inclusive).
+        end_commit (`str`): The earliest commit (exclusive).

    Returns:
        `str`: The earliest commit at which `target_test` fails.
    """

+    # check if `end_commit` fails the test
+    failed_before = is_bad_commit(target_test, end_commit)
+    if failed_before:
+        return (
+            None,
+            f"flaky: test passed in the previous run (commit: {end_commit}) but failed (on the same commit) during the check of the current run.",
+        )
+
+    # if there is no new commit (e.g. 2 different CI runs on the same commit):
+    #   - failed once on `start_commit` but passed on `end_commit`, which are the same commit --> flaky (or something change externally) --> don't report
    if start_commit == end_commit:
-        return start_commit
+        return (
+            None,
+            f"flaky: test fails on the current CI run but passed in the previous run which is running on the same commit {end_commit}.",
+        )
+
+    # Now, we are (almost) sure `target_test` is not failing at `end_commit`
+    # check if `start_commit` fail the test
+    failed_now = is_bad_commit(target_test, start_commit)
+    if not failed_now:
+        # failed on CI run, but not reproducible here --> don't report
+        return None, f"flaky: test fails on the current CI run (commit: {start_commit}) but passes during the check."

    create_script(target_test=target_test)

@ -105,7 +158,7 @@ git bisect run python3 target_script.py
    if "error: bisect run failed" in result.stderr:
        error_msg = f"Error when running git bisect:\nbash error: {result.stderr}\nbash output:\n{result.stdout}\nset `bad_commit` to `None`."
        print(error_msg)
-        return None
+        return None, "git bisect failed"

    pattern = r"(.+) is the first bad commit"
    commits = re.findall(pattern, result.stdout)
@ -117,7 +170,7 @@ git bisect run python3 target_script.py
    print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}")
    print(f"bad_commit: {bad_commit}\n")

-    return bad_commit
+    return bad_commit, "git bisect find the bad commit."


 def get_commit_info(commit):
@ -171,9 +224,11 @@ if __name__ == "__main__":
        raise ValueError("Exactly one argument `test` or `file` must be specified.")

    if args.test is not None:
-        commit = find_bad_commit(target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit)
+        commit, status = find_bad_commit(
+            target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit
+        )
        with open(args.output_file, "w", encoding="UTF-8") as fp:
-            fp.write(f"{args.test}\n{commit}")
+            fp.write(f"{args.test}\n{commit}\n{status}")
    elif os.path.isfile(args.file):
        with open(args.file, "r", encoding="UTF-8") as fp:
            reports = json.load(fp)
@ -185,8 +240,10 @@ if __name__ == "__main__":

            failed_tests_with_bad_commits = []
            for test in failed_tests:
-                commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit)
-                info = {"test": test, "commit": commit}
+                commit, status = find_bad_commit(
+                    target_test=test, start_commit=args.start_commit, end_commit=args.end_commit
+                )
+                info = {"test": test, "commit": commit, "status": status}

                if commit in commit_info_cache:
                    commit_info = commit_info_cache[commit]
--- a/utils/process_bad_commit_report.py
+++ b/utils/process_bad_commit_report.py
@ -26,6 +26,33 @@ if __name__ == "__main__":

    job_name = os.environ.get("JOB_NAME")

+    # Upload to Hub and get the url
+    # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
+    report_repo_subfolder = ""
+    if os.getenv("GITHUB_EVENT_NAME") != "schedule":
+        report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
+        report_repo_subfolder = f"runs/{report_repo_subfolder}"
+
+    workflow_run = get_last_daily_ci_run(
+        token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
+    )
+    workflow_run_created_time = workflow_run["created_at"]
+
+    report_repo_folder = workflow_run_created_time.split("T")[0]
+
+    if report_repo_subfolder:
+        report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
+
+    report_repo_id = os.getenv("REPORT_REPO_ID")
+
+    commit_info = api.upload_file(
+        path_or_fileobj="new_failures_with_bad_commit.json",
+        path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit.json",
+        repo_id=report_repo_id,
+        repo_type="dataset",
+        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
+    )
+
    with open("new_failures_with_bad_commit.json") as fp:
        data = json.load(fp)

@ -88,25 +115,6 @@ if __name__ == "__main__":
            _data[model] = {k: v for k, v in model_result.items() if len(v) > 0}
        new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}

-    # Upload to Hub and get the url
-    # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
-    report_repo_subfolder = ""
-    if os.getenv("GITHUB_EVENT_NAME") != "schedule":
-        report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
-        report_repo_subfolder = f"runs/{report_repo_subfolder}"
-
-    workflow_run = get_last_daily_ci_run(
-        token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
-    )
-    workflow_run_created_time = workflow_run["created_at"]
-
-    report_repo_folder = workflow_run_created_time.split("T")[0]
-
-    if report_repo_subfolder:
-        report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
-
-    report_repo_id = os.getenv("REPORT_REPO_ID")
-
    with open("new_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
        json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
    commit_info = api.upload_file(
--- a/utils/split_model_tests.py
+++ b/utils/split_model_tests.py
@ -74,4 +74,5 @@ if __name__ == "__main__":
        end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
        model_splits.append(d[start:end])

+    model_splits = [["models/vit"], ["models/clip"]]
    print(model_splits)
Author	SHA1	Message	Date
ydshieh	ca38d640b7	fix	2025-10-17 22:21:10 +02:00
ydshieh	b8011a3dc5	fix	2025-10-17 21:30:29 +02:00
ydshieh	a118c8e1c4	fix	2025-10-17 20:59:00 +02:00
ydshieh	0109c42409	fix	2025-10-17 20:30:23 +02:00
ydshieh	3c7552f733	fix	2025-10-17 15:40:54 +02:00
ydshieh	4757bf062b	fix	2025-10-17 15:12:12 +02:00
ydshieh	aceaa7ce97	fix	2025-10-17 15:05:09 +02:00
ydshieh	c9293376a0	fix	2025-10-17 12:04:50 +02:00
ydshieh	e69d3ca150	check 1	2025-10-17 10:44:24 +02:00
ydshieh	bffad7f4fb	check 1	2025-10-17 09:21:09 +02:00
ydshieh	740f952218	check 1	2025-10-17 06:57:10 +02:00
ydshieh	950c4e5303	check 1	2025-10-17 06:28:55 +02:00
ydshieh	89970f4797	check 1	2025-10-17 03:03:25 +02:00
ydshieh	a4a46e62a5	check 1	2025-10-16 21:32:04 +02:00
ydshieh	9b36498d5f	1	2025-10-16 21:16:53 +02:00