further improve utils/check_bad_commit.py (#41658) (#41690)

* fix

* Update utils/check_bad_commit.py

Co-authored-by: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com>

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-10-17 23:07:00 +02:00
committed by GitHub
parent 448c553ccb
commit 307c523854
3 changed files with 156 additions and 43 deletions

View File

@ -41,9 +41,14 @@ env:
jobs:
check_new_failures:
name: " "
name: "Find commits for new failing tests"
strategy:
matrix:
run_idx: [1]
runs-on:
group: aws-g5-4xlarge-cache
outputs:
process: ${{ steps.check_file.outputs.process }}
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -54,14 +59,17 @@ jobs:
path: /transformers/ci_results_${{ inputs.job }}
- name: Check file
id: check_file
working-directory: /transformers
run: |
if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
echo "process=true" >> $GITHUB_ENV
echo "process=true" >> $GITHUB_OUTPUT
else
echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
echo "process=false" >> $GITHUB_ENV
echo "process=false" >> $GITHUB_OUTPUT
fi
- uses: actions/download-artifact@v4
@ -118,6 +126,10 @@ jobs:
run: |
python3 utils/print_env.py
- name: Install pytest-flakefinder
if: ${{ env.process == 'true' }}
run: python3 -m pip install pytest-flakefinder
- name: Show installed libraries and their versions
working-directory: /transformers
if: ${{ env.process == 'true' }}
@ -126,25 +138,63 @@ jobs:
- name: Check failed tests
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
- name: Show results
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: |
ls -l new_failures_with_bad_commit.json
cat new_failures_with_bad_commit.json
ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
- name: Checkout back
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
process_new_failures_with_commit_info:
name: "process bad commit reports"
needs: check_new_failures
if: needs.check_new_failures.outputs.process == 'true'
runs-on:
group: aws-g5-4xlarge-cache
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: /transformers/ci_results_${{ inputs.job }}
- uses: actions/download-artifact@v4
with:
pattern: new_failures_with_bad_commit_${{ inputs.job }}*
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
merge-multiple: true
- name: Check files
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: |
git checkout ${{ inputs.start_sha }}
ls -la /transformers
ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }}
# Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
# to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
- name: Merge files
shell: bash
working-directory: /transformers
run: |
cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
- name: Process report
shell: bash
working-directory: /transformers
if: ${{ env.process == 'true' }}
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -156,7 +206,6 @@ jobs:
- name: Process report
shell: bash
working-directory: /transformers
if: ${{ env.process == 'true' }}
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -171,13 +220,12 @@ jobs:
- name: Prepare Slack report title
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: |
pip install slack_sdk
echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
- name: Send processed report
if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.

View File

@ -20,6 +20,7 @@ import os
import re
import subprocess
import git
import requests
@ -38,8 +39,14 @@ def create_script(target_test):
import os
import subprocess
_ = subprocess.run(
["python3", "-m", "pip", "install", "-e", "."],
capture_output = True,
text=True,
)
result = subprocess.run(
["python3", "-m", "pytest", "-v", "-rfEp", f"{target_test}"],
["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"],
capture_output = True,
text=True,
)
@ -47,17 +54,20 @@ print(result.stdout)
if f"FAILED {target_test}" in result.stdout:
print("test failed")
exit(2)
exit(1)
elif result.returncode != 0:
if "ERROR: file or directory not found: " in result.stderr:
print("test file or directory not found in this commit")
# git bisect treats exit code 125 as `test not found`. But this causes it not be able to make the conclusion
# if a test is added between the `good commit` (exclusive) and `bad commit` (inclusive) (in git bisect terminology).
# So we return 0 here in order to allow the process being able to identify the first commit that fails the test.
exit(0)
elif "ERROR: not found: " in result.stderr:
print("test not found in this commit")
exit(0)
else:
print(f"pytest gets unknown error: {{result.stderr}}")
exit(-1)
exit(1)
print(f"pytest runs successfully.")
exit(0)
@ -67,20 +77,63 @@ exit(0)
fp.write(script.strip())
def is_bad_commit(target_test, commit):
repo = git.Repo(".") # or specify path to your repo
# Save the current HEAD reference
original_head = repo.head.commit
# Checkout to the commit
repo.git.checkout(commit)
create_script(target_test=target_test)
result = subprocess.run(
["python3", "target_script.py"],
capture_output=True,
text=True,
)
# Restore to original commit
repo.git.checkout(original_head)
return result.returncode != 0
def find_bad_commit(target_test, start_commit, end_commit):
"""Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails.
"""Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails.
Args:
target_test (`str`): The test to check.
start_commit (`str`): The latest commit.
end_commit (`str`): The earliest commit.
start_commit (`str`): The latest commit (inclusive).
end_commit (`str`): The earliest commit (exclusive).
Returns:
`str`: The earliest commit at which `target_test` fails.
"""
# check if `end_commit` fails the test
failed_before = is_bad_commit(target_test, end_commit)
if failed_before:
return (
None,
f"flaky: test passed in the previous run (commit: {end_commit}) but failed (on the same commit) during the check of the current run.",
)
# if there is no new commit (e.g. 2 different CI runs on the same commit):
# - failed once on `start_commit` but passed on `end_commit`, which are the same commit --> flaky (or something change externally) --> don't report
if start_commit == end_commit:
return start_commit
return (
None,
f"flaky: test fails on the current CI run but passed in the previous run which is running on the same commit {end_commit}.",
)
# Now, we are (almost) sure `target_test` is not failing at `end_commit`
# check if `start_commit` fail the test
failed_now = is_bad_commit(target_test, start_commit)
if not failed_now:
# failed on CI run, but not reproducible here --> don't report
return None, f"flaky: test fails on the current CI run (commit: {start_commit}) but passes during the check."
create_script(target_test=target_test)
@ -105,7 +158,7 @@ git bisect run python3 target_script.py
if "error: bisect run failed" in result.stderr:
error_msg = f"Error when running git bisect:\nbash error: {result.stderr}\nbash output:\n{result.stdout}\nset `bad_commit` to `None`."
print(error_msg)
return None
return None, "git bisect failed"
pattern = r"(.+) is the first bad commit"
commits = re.findall(pattern, result.stdout)
@ -117,7 +170,7 @@ git bisect run python3 target_script.py
print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}")
print(f"bad_commit: {bad_commit}\n")
return bad_commit
return bad_commit, "git bisect found the bad commit."
def get_commit_info(commit):
@ -171,9 +224,11 @@ if __name__ == "__main__":
raise ValueError("Exactly one argument `test` or `file` must be specified.")
if args.test is not None:
commit = find_bad_commit(target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit)
commit, status = find_bad_commit(
target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit
)
with open(args.output_file, "w", encoding="UTF-8") as fp:
fp.write(f"{args.test}\n{commit}")
fp.write(f"{args.test}\n{commit}\n{status}")
elif os.path.isfile(args.file):
with open(args.file, "r", encoding="UTF-8") as fp:
reports = json.load(fp)
@ -185,8 +240,10 @@ if __name__ == "__main__":
failed_tests_with_bad_commits = []
for test in failed_tests:
commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit)
info = {"test": test, "commit": commit}
commit, status = find_bad_commit(
target_test=test, start_commit=args.start_commit, end_commit=args.end_commit
)
info = {"test": test, "commit": commit, "status": status}
if commit in commit_info_cache:
commit_info = commit_info_cache[commit]

View File

@ -26,6 +26,33 @@ if __name__ == "__main__":
job_name = os.environ.get("JOB_NAME")
# Upload to Hub and get the url
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"
workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
report_repo_folder = workflow_run_created_time.split("T")[0]
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
report_repo_id = os.getenv("REPORT_REPO_ID")
commit_info = api.upload_file(
path_or_fileobj="new_failures_with_bad_commit.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit.json",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
with open("new_failures_with_bad_commit.json") as fp:
data = json.load(fp)
@ -88,25 +115,6 @@ if __name__ == "__main__":
_data[model] = {k: v for k, v in model_result.items() if len(v) > 0}
new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
# Upload to Hub and get the url
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"
workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
report_repo_folder = workflow_run_created_time.split("T")[0]
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
report_repo_id = os.getenv("REPORT_REPO_ID")
with open("new_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
commit_info = api.upload_file(