diff --git a/.github/workflows/check_failed_tests.yml b/.github/workflows/check_failed_tests.yml index 8382c46dd0e..aa60275b588 100644 --- a/.github/workflows/check_failed_tests.yml +++ b/.github/workflows/check_failed_tests.yml @@ -41,9 +41,14 @@ env: jobs: check_new_failures: - name: " " + name: "Find commits for new failing tests" + strategy: + matrix: + run_idx: [1] runs-on: group: aws-g5-4xlarge-cache + outputs: + process: ${{ steps.check_file.outputs.process }} container: image: ${{ inputs.docker }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -54,14 +59,17 @@ jobs: path: /transformers/ci_results_${{ inputs.job }} - name: Check file + id: check_file working-directory: /transformers run: | if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..." echo "process=true" >> $GITHUB_ENV + echo "process=true" >> $GITHUB_OUTPUT else echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort." echo "process=false" >> $GITHUB_ENV + echo "process=false" >> $GITHUB_OUTPUT fi - uses: actions/download-artifact@v4 @@ -118,6 +126,10 @@ jobs: run: | python3 utils/print_env.py + - name: Install pytest-flakefinder + if: ${{ env.process == 'true' }} + run: python3 -m pip install pytest-flakefinder + - name: Show installed libraries and their versions working-directory: /transformers if: ${{ env.process == 'true' }} @@ -126,25 +138,63 @@ jobs: - name: Check failed tests working-directory: /transformers if: ${{ env.process == 'true' }} - run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json + run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json - name: Show results working-directory: /transformers if: ${{ env.process == 'true' }} run: | - ls -l new_failures_with_bad_commit.json - cat new_failures_with_bad_commit.json + ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json + cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json - - name: Checkout back + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }} + path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json + + process_new_failures_with_commit_info: + name: "process bad commit reports" + needs: check_new_failures + if: needs.check_new_failures.outputs.process == 'true' + runs-on: + group: aws-g5-4xlarge-cache + container: + image: ${{ inputs.docker }} + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - uses: actions/download-artifact@v4 + with: + name: ci_results_${{ inputs.job }} + path: /transformers/ci_results_${{ inputs.job }} + + - uses: actions/download-artifact@v4 + with: + pattern: new_failures_with_bad_commit_${{ inputs.job }}* + path: /transformers/new_failures_with_bad_commit_${{ inputs.job }} + merge-multiple: true + + - name: Check files working-directory: /transformers - if: ${{ env.process == 'true' }} run: | - git checkout ${{ inputs.start_sha }} + ls -la /transformers + ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }} + + # Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners + # to further reduce the false positive caused by flaky tests, which requires further processing to merge reports. + - name: Merge files + shell: bash + working-directory: /transformers + run: | + cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json + + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} - name: Process report shell: bash working-directory: /transformers - if: ${{ env.process == 'true' }} env: ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} @@ -156,7 +206,6 @@ jobs: - name: Process report shell: bash working-directory: /transformers - if: ${{ env.process == 'true' }} env: ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} @@ -171,13 +220,12 @@ jobs: - name: Prepare Slack report title working-directory: /transformers - if: ${{ env.process == 'true' }} run: | pip install slack_sdk echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV - name: Send processed report - if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }} + if: ${{ !endsWith(env.REPORT_TEXT, '{}') }} uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001 with: # Slack channel id, channel name, or user id to post message. diff --git a/utils/check_bad_commit.py b/utils/check_bad_commit.py index da1cdbbe75f..8c053c578d9 100644 --- a/utils/check_bad_commit.py +++ b/utils/check_bad_commit.py @@ -20,6 +20,7 @@ import os import re import subprocess +import git import requests @@ -38,8 +39,14 @@ def create_script(target_test): import os import subprocess +_ = subprocess.run( + ["python3", "-m", "pip", "install", "-e", "."], + capture_output = True, + text=True, +) + result = subprocess.run( - ["python3", "-m", "pytest", "-v", "-rfEp", f"{target_test}"], + ["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"], capture_output = True, text=True, ) @@ -47,17 +54,20 @@ print(result.stdout) if f"FAILED {target_test}" in result.stdout: print("test failed") - exit(2) + exit(1) elif result.returncode != 0: if "ERROR: file or directory not found: " in result.stderr: print("test file or directory not found in this commit") + # git bisect treats exit code 125 as `test not found`. But this causes it not be able to make the conclusion + # if a test is added between the `good commit` (exclusive) and `bad commit` (inclusive) (in git bisect terminology). + # So we return 0 here in order to allow the process being able to identify the first commit that fails the test. exit(0) elif "ERROR: not found: " in result.stderr: print("test not found in this commit") exit(0) else: print(f"pytest gets unknown error: {{result.stderr}}") - exit(-1) + exit(1) print(f"pytest runs successfully.") exit(0) @@ -67,20 +77,63 @@ exit(0) fp.write(script.strip()) +def is_bad_commit(target_test, commit): + repo = git.Repo(".") # or specify path to your repo + + # Save the current HEAD reference + original_head = repo.head.commit + + # Checkout to the commit + repo.git.checkout(commit) + + create_script(target_test=target_test) + + result = subprocess.run( + ["python3", "target_script.py"], + capture_output=True, + text=True, + ) + + # Restore to original commit + repo.git.checkout(original_head) + + return result.returncode != 0 + + def find_bad_commit(target_test, start_commit, end_commit): - """Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails. + """Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails. Args: target_test (`str`): The test to check. - start_commit (`str`): The latest commit. - end_commit (`str`): The earliest commit. + start_commit (`str`): The latest commit (inclusive). + end_commit (`str`): The earliest commit (exclusive). Returns: `str`: The earliest commit at which `target_test` fails. """ + # check if `end_commit` fails the test + failed_before = is_bad_commit(target_test, end_commit) + if failed_before: + return ( + None, + f"flaky: test passed in the previous run (commit: {end_commit}) but failed (on the same commit) during the check of the current run.", + ) + + # if there is no new commit (e.g. 2 different CI runs on the same commit): + # - failed once on `start_commit` but passed on `end_commit`, which are the same commit --> flaky (or something change externally) --> don't report if start_commit == end_commit: - return start_commit + return ( + None, + f"flaky: test fails on the current CI run but passed in the previous run which is running on the same commit {end_commit}.", + ) + + # Now, we are (almost) sure `target_test` is not failing at `end_commit` + # check if `start_commit` fail the test + failed_now = is_bad_commit(target_test, start_commit) + if not failed_now: + # failed on CI run, but not reproducible here --> don't report + return None, f"flaky: test fails on the current CI run (commit: {start_commit}) but passes during the check." create_script(target_test=target_test) @@ -105,7 +158,7 @@ git bisect run python3 target_script.py if "error: bisect run failed" in result.stderr: error_msg = f"Error when running git bisect:\nbash error: {result.stderr}\nbash output:\n{result.stdout}\nset `bad_commit` to `None`." print(error_msg) - return None + return None, "git bisect failed" pattern = r"(.+) is the first bad commit" commits = re.findall(pattern, result.stdout) @@ -117,7 +170,7 @@ git bisect run python3 target_script.py print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}") print(f"bad_commit: {bad_commit}\n") - return bad_commit + return bad_commit, "git bisect found the bad commit." def get_commit_info(commit): @@ -171,9 +224,11 @@ if __name__ == "__main__": raise ValueError("Exactly one argument `test` or `file` must be specified.") if args.test is not None: - commit = find_bad_commit(target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit) + commit, status = find_bad_commit( + target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit + ) with open(args.output_file, "w", encoding="UTF-8") as fp: - fp.write(f"{args.test}\n{commit}") + fp.write(f"{args.test}\n{commit}\n{status}") elif os.path.isfile(args.file): with open(args.file, "r", encoding="UTF-8") as fp: reports = json.load(fp) @@ -185,8 +240,10 @@ if __name__ == "__main__": failed_tests_with_bad_commits = [] for test in failed_tests: - commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit) - info = {"test": test, "commit": commit} + commit, status = find_bad_commit( + target_test=test, start_commit=args.start_commit, end_commit=args.end_commit + ) + info = {"test": test, "commit": commit, "status": status} if commit in commit_info_cache: commit_info = commit_info_cache[commit] diff --git a/utils/process_bad_commit_report.py b/utils/process_bad_commit_report.py index 2007d5348f1..9bf09825013 100644 --- a/utils/process_bad_commit_report.py +++ b/utils/process_bad_commit_report.py @@ -26,6 +26,33 @@ if __name__ == "__main__": job_name = os.environ.get("JOB_NAME") + # Upload to Hub and get the url + # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder` + report_repo_subfolder = "" + if os.getenv("GITHUB_EVENT_NAME") != "schedule": + report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}" + report_repo_subfolder = f"runs/{report_repo_subfolder}" + + workflow_run = get_last_daily_ci_run( + token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID") + ) + workflow_run_created_time = workflow_run["created_at"] + + report_repo_folder = workflow_run_created_time.split("T")[0] + + if report_repo_subfolder: + report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}" + + report_repo_id = os.getenv("REPORT_REPO_ID") + + commit_info = api.upload_file( + path_or_fileobj="new_failures_with_bad_commit.json", + path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit.json", + repo_id=report_repo_id, + repo_type="dataset", + token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), + ) + with open("new_failures_with_bad_commit.json") as fp: data = json.load(fp) @@ -88,25 +115,6 @@ if __name__ == "__main__": _data[model] = {k: v for k, v in model_result.items() if len(v) > 0} new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0} - # Upload to Hub and get the url - # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder` - report_repo_subfolder = "" - if os.getenv("GITHUB_EVENT_NAME") != "schedule": - report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}" - report_repo_subfolder = f"runs/{report_repo_subfolder}" - - workflow_run = get_last_daily_ci_run( - token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID") - ) - workflow_run_created_time = workflow_run["created_at"] - - report_repo_folder = workflow_run_created_time.split("T")[0] - - if report_repo_subfolder: - report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}" - - report_repo_id = os.getenv("REPORT_REPO_ID") - with open("new_failures_with_bad_commit_grouped_by_authors.json", "w") as fp: json.dump(new_data_full, fp, ensure_ascii=False, indent=4) commit_info = api.upload_file(