check 1

2025-10-20 17:13:56 +08:00 · 2025-10-17 06:57:10 +02:00 · 2025-10-17 06:28:55 +02:00 · 2025-10-17 03:03:25 +02:00
2 changed files with 57 additions and 9 deletions
--- a/.github/workflows/check_failed_tests.yml
+++ b/.github/workflows/check_failed_tests.yml
@ -45,7 +45,6 @@ jobs:
    strategy:
      matrix:
        run_idx: [1, 2, 3]
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
    runs-on:
      group: aws-g5-4xlarge-cache
    container:
@ -122,6 +121,10 @@ jobs:
        run: |
          python3 utils/print_env.py

+      - name: Install pytest-flakefinder
+        if: ${{ env.process == 'true' }}
+        run: python3 -m pip install pytest-flakefinder
+
      - name: Show installed libraries and their versions
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
@ -147,6 +150,7 @@ jobs:

  process_new_failures_with_commit_info:
    name: "process bad commit reports"
+    needs: [check_new_failures]
    runs-on:
      group: aws-g5-4xlarge-cache
    container:
--- a/utils/check_bad_commit.py
+++ b/utils/check_bad_commit.py
@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
+import git
 import json
 import os
 import re
@ -38,8 +39,14 @@ def create_script(target_test):
 import os
 import subprocess

+_ = subprocess.run(
+    ["python3", "-m", "pip", "install", "-e", "."],
+    capture_output = True,
+    text=True,
+)
+
 result = subprocess.run(
-    ["python3", "-m", "pytest", "-v", "-rfEp", f"{target_test}"],
+    ["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"],
    capture_output = True,
    text=True,
 )
@ -47,14 +54,14 @@ print(result.stdout)

 if f"FAILED {target_test}" in result.stdout:
    print("test failed")
-    exit(2)
+    exit(1)
 elif result.returncode != 0:
    if "ERROR: file or directory not found: " in result.stderr:
        print("test file or directory not found in this commit")
-        exit(0)
+        exit(125)
    elif "ERROR: not found: " in result.stderr:
        print("test not found in this commit")
-        exit(0)
+        exit(125)
    else:
        print(f"pytest gets unknown error: {{result.stderr}}")
        exit(-1)
@ -67,20 +74,57 @@ exit(0)
        fp.write(script.strip())


+def is_bad_commit(target_test, commit):
+    repo = git.Repo('.')  # or specify path to your repo
+
+    # Save the current HEAD reference
+    original_head = repo.head.commit
+
+    # Checkout to the commit
+    repo.git.checkout(commit)
+
+    create_script(target_test=target_test)
+
+    result = subprocess.run(
+        ["python3", "target_script.py"],
+        capture_output=True,
+        text=True,
+    )
+
+    # Restore to original commit
+    repo.git.checkout(original_head)
+
+    return result.returncode != 0
+
+
 def find_bad_commit(target_test, start_commit, end_commit):
-    """Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails.
+    """Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails.

    Args:
        target_test (`str`): The test to check.
-        start_commit (`str`): The latest commit.
-        end_commit (`str`): The earliest commit.
+        start_commit (`str`): The latest commit (inclusive).
+        end_commit (`str`): The earliest commit (exclusive).

    Returns:
        `str`: The earliest commit at which `target_test` fails.
    """

+    # check if `end_commit` fails the test
+    failed_before = is_bad_commit(target_test, end_commit)
+    if failed_before:
+        return None
+
+    # if there is no new commit (e.g. 2 different CI runs on the same commit):
+    #   - failed once on `start_commit` but passed on `end_commit`, which are the same commit --> flaky (or something change externally) --> don't report
    if start_commit == end_commit:
-        return start_commit
+        return None
+
+    # Now, we are (almost) sure `target_test` is not failing at `end_commit`
+    # check if `start_commit` fail the test
+    failed_now = is_bad_commit(target_test, start_commit)
+    if not failed_now:
+        # failed on CI run, but not reproducible here --> don't report
+        return None

    create_script(target_test=target_test)
Author	SHA1	Message	Date
ydshieh	740f952218	check 1	2025-10-17 06:57:10 +02:00
ydshieh	950c4e5303	check 1	2025-10-17 06:28:55 +02:00
ydshieh	89970f4797	check 1	2025-10-17 03:03:25 +02:00