Compare commits

...

5 Commits

Author SHA1 Message Date
ba92002928 CI with torch 2.7 2025-04-03 21:51:03 +02:00
ee25237a2f try 2025-04-03 21:37:38 +02:00
246db22767 try 2025-04-03 21:12:23 +02:00
b6328584bf try 2025-04-03 20:50:37 +02:00
98adb0d92e try 2025-04-03 20:24:28 +02:00
8 changed files with 102 additions and 91 deletions

View File

@ -71,13 +71,13 @@ jobs:
- name: Check failed tests - name: Check failed tests
working-directory: /transformers working-directory: /transformers
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures_temp.json --output_file new_model_failures_with_bad_commit_temp.json
- name: Show results - name: Show results
working-directory: /transformers working-directory: /transformers
run: | run: |
ls -l new_model_failures_with_bad_commit.json ls -l new_model_failures_with_bad_commit_temp.json
cat new_model_failures_with_bad_commit.json cat new_model_failures_with_bad_commit_temp.json
- name: Checkout back - name: Checkout back
working-directory: /transformers working-directory: /transformers

View File

@ -93,6 +93,10 @@ jobs:
run: | run: |
python3 utils/print_env.py python3 utils/print_env.py
- name: Installed torch 2.7 RC
working-directory: /transformers
run: python3 -m pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
- name: Show installed libraries and their versions - name: Show installed libraries and their versions
working-directory: /transformers working-directory: /transformers
run: pip freeze run: pip freeze

View File

@ -2,12 +2,12 @@ name: Self-hosted runner (scheduled)
on: on:
repository_dispatch: # repository_dispatch:
schedule: # schedule:
- cron: "17 2 * * *" # - cron: "17 2 * * *"
push: push:
branches: branches:
- run_scheduled_ci* - ci_with_torch_2.7
jobs: jobs:
model-ci: model-ci:
@ -20,59 +20,59 @@ jobs:
docker: huggingface/transformers-all-latest-gpu docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI ci_event: Daily CI
secrets: inherit secrets: inherit
#
torch-pipeline: # torch-pipeline:
name: Torch pipeline CI # name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled.yml # uses: ./.github/workflows/self-scheduled.yml
with: # with:
job: run_pipelines_torch_gpu # job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch" # slack_report_channel: "#transformers-ci-daily-pipeline-torch"
runner: daily-ci # runner: daily-ci
docker: huggingface/transformers-pytorch-gpu # docker: huggingface/transformers-pytorch-gpu
ci_event: Daily CI # ci_event: Daily CI
secrets: inherit # secrets: inherit
#
tf-pipeline: # tf-pipeline:
name: TF pipeline CI # name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml # uses: ./.github/workflows/self-scheduled.yml
with: # with:
job: run_pipelines_tf_gpu # job: run_pipelines_tf_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-tf" # slack_report_channel: "#transformers-ci-daily-pipeline-tf"
runner: daily-ci # runner: daily-ci
docker: huggingface/transformers-tensorflow-gpu # docker: huggingface/transformers-tensorflow-gpu
ci_event: Daily CI # ci_event: Daily CI
secrets: inherit # secrets: inherit
#
example-ci: # example-ci:
name: Example CI # name: Example CI
uses: ./.github/workflows/self-scheduled.yml # uses: ./.github/workflows/self-scheduled.yml
with: # with:
job: run_examples_gpu # job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-examples" # slack_report_channel: "#transformers-ci-daily-examples"
runner: daily-ci # runner: daily-ci
docker: huggingface/transformers-all-latest-gpu # docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI # ci_event: Daily CI
secrets: inherit # secrets: inherit
#
deepspeed-ci: # deepspeed-ci:
name: DeepSpeed CI # name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled.yml # uses: ./.github/workflows/self-scheduled.yml
with: # with:
job: run_torch_cuda_extensions_gpu # job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-deepspeed" # slack_report_channel: "#transformers-ci-daily-deepspeed"
runner: daily-ci # runner: daily-ci
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu # docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
ci_event: Daily CI # ci_event: Daily CI
working-directory-prefix: /workspace # working-directory-prefix: /workspace
secrets: inherit # secrets: inherit
#
quantization-ci: # quantization-ci:
name: Quantization CI # name: Quantization CI
uses: ./.github/workflows/self-scheduled.yml # uses: ./.github/workflows/self-scheduled.yml
with: # with:
job: run_quantization_torch_gpu # job: run_quantization_torch_gpu
slack_report_channel: "#transformers-ci-daily-quantization" # slack_report_channel: "#transformers-ci-daily-quantization"
runner: daily-ci # runner: daily-ci
docker: huggingface/transformers-quantization-latest-gpu # docker: huggingface/transformers-quantization-latest-gpu
ci_event: Daily CI # ci_event: Daily CI
secrets: inherit # secrets: inherit

View File

@ -49,6 +49,9 @@ if len(result.stderr) > 0:
if "ERROR: file or directory not found: " in result.stderr: if "ERROR: file or directory not found: " in result.stderr:
print("test not found in this commit") print("test not found in this commit")
exit(0) exit(0)
elif "ERROR: not found: " in result.stderr:
print("test not found in this commit")
exit(0)
else: else:
print(f"pytest failed to run: {{result.stderr}}") print(f"pytest failed to run: {{result.stderr}}")
exit(-1) exit(-1)

View File

@ -31,6 +31,7 @@ def get_daily_ci_runs(token, num_runs=7):
def get_last_daily_ci_runs(token): def get_last_daily_ci_runs(token):
"""Get the last completed workflow run id of the scheduled (daily) CI.""" """Get the last completed workflow run id of the scheduled (daily) CI."""
return "14233781160"
workflow_runs = get_daily_ci_runs(token) workflow_runs = get_daily_ci_runs(token)
workflow_run_id = None workflow_run_id = None
for workflow_run in workflow_runs: for workflow_run in workflow_runs:

View File

@ -523,20 +523,20 @@ class Message:
extra_blocks = self.get_new_model_failure_blocks(to_truncate=False) extra_blocks = self.get_new_model_failure_blocks(to_truncate=False)
if extra_blocks: if extra_blocks:
failure_text = extra_blocks[-1]["text"]["text"] failure_text = extra_blocks[-1]["text"]["text"]
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
with open(file_path, "w", encoding="UTF-8") as fp: with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(failure_text) fp.write(failure_text)
# upload results to Hub dataset # upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
commit_info = api.upload_file( commit_info = api.upload_file(
path_or_fileobj=file_path, path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt"
# extra processing to save to json format # extra processing to save to json format
new_failed_tests = {} new_failed_tests = {}
@ -550,15 +550,15 @@ class Message:
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []} new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
for url, device in items: for url, device in items:
new_failed_tests[model][f"{device}-gpu"].append(line) new_failed_tests[model][f"{device}-gpu"].append(line)
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
with open(file_path, "w", encoding="UTF-8") as fp: with open(file_path, "w", encoding="UTF-8") as fp:
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4) json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
# upload results to Hub dataset # upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
_ = api.upload_file( _ = api.upload_file(
path_or_fileobj=file_path, path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
@ -1220,6 +1220,7 @@ if __name__ == "__main__":
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main" target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
is_scheduled_ci_run = True
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
# results. # results.
@ -1228,14 +1229,14 @@ if __name__ == "__main__":
json.dump(model_results, fp, indent=4, ensure_ascii=False) json.dump(model_results, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`) # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run: # if is_scheduled_ci_run:
api.upload_file( # api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_results.json", # path_or_fileobj=f"ci_results_{job_name}/model_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json", # path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
repo_id="hf-internal-testing/transformers_daily_ci", # repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", # repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), # token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) # )
# Must have the same keys as in `additional_results`. # Must have the same keys as in `additional_results`.
# The values are used as the file names where to save the corresponding CI job results. # The values are used as the file names where to save the corresponding CI job results.
@ -1250,14 +1251,14 @@ if __name__ == "__main__":
json.dump(job_result, fp, indent=4, ensure_ascii=False) json.dump(job_result, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`) # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run: # if is_scheduled_ci_run:
api.upload_file( # api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", # path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json", # path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
repo_id="hf-internal-testing/transformers_daily_ci", # repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", # repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), # token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) # )
prev_ci_artifacts = None prev_ci_artifacts = None
if is_scheduled_ci_run: if is_scheduled_ci_run:

View File

@ -24,7 +24,7 @@ from huggingface_hub import HfApi
if __name__ == "__main__": if __name__ == "__main__":
api = HfApi() api = HfApi()
with open("new_model_failures_with_bad_commit.json") as fp: with open("new_model_failures_with_bad_commit_temp.json") as fp:
data = json.load(fp) data = json.load(fp)
# TODO: extend # TODO: extend
@ -68,16 +68,16 @@ if __name__ == "__main__":
new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0} new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
# Upload to Hub and get the url # Upload to Hub and get the url
with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp: with open("new_model_failures_with_bad_commit_grouped_by_authors_temp.json", "w") as fp:
json.dump(new_data_full, fp, ensure_ascii=False, indent=4) json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
commit_info = api.upload_file( commit_info = api.upload_file(
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json", path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json" url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json"
# Add `GH_` prefix as keyword mention # Add `GH_` prefix as keyword mention
output = {} output = {}

View File

@ -62,4 +62,6 @@ if __name__ == "__main__":
start = end start = end
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0) end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
model_splits.append(d[start:end]) model_splits.append(d[start:end])
# model_splits = [["models/vit"]]
print(model_splits) print(model_splits)