Compare commits

...

5 Commits

Author SHA1 Message Date
ba92002928 CI with torch 2.7 2025-04-03 21:51:03 +02:00
ee25237a2f try 2025-04-03 21:37:38 +02:00
246db22767 try 2025-04-03 21:12:23 +02:00
b6328584bf try 2025-04-03 20:50:37 +02:00
98adb0d92e try 2025-04-03 20:24:28 +02:00
8 changed files with 102 additions and 91 deletions

View File

@ -71,13 +71,13 @@ jobs:
- name: Check failed tests
working-directory: /transformers
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures_temp.json --output_file new_model_failures_with_bad_commit_temp.json
- name: Show results
working-directory: /transformers
run: |
ls -l new_model_failures_with_bad_commit.json
cat new_model_failures_with_bad_commit.json
ls -l new_model_failures_with_bad_commit_temp.json
cat new_model_failures_with_bad_commit_temp.json
- name: Checkout back
working-directory: /transformers

View File

@ -93,6 +93,10 @@ jobs:
run: |
python3 utils/print_env.py
- name: Installed torch 2.7 RC
working-directory: /transformers
run: python3 -m pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze

View File

@ -2,12 +2,12 @@ name: Self-hosted runner (scheduled)
on:
repository_dispatch:
schedule:
- cron: "17 2 * * *"
# repository_dispatch:
# schedule:
# - cron: "17 2 * * *"
push:
branches:
- run_scheduled_ci*
- ci_with_torch_2.7
jobs:
model-ci:
@ -20,59 +20,59 @@ jobs:
docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
runner: daily-ci
docker: huggingface/transformers-pytorch-gpu
ci_event: Daily CI
secrets: inherit
tf-pipeline:
name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_tf_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
runner: daily-ci
docker: huggingface/transformers-tensorflow-gpu
ci_event: Daily CI
secrets: inherit
example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-examples"
runner: daily-ci
docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-deepspeed"
runner: daily-ci
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
ci_event: Daily CI
working-directory-prefix: /workspace
secrets: inherit
quantization-ci:
name: Quantization CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_quantization_torch_gpu
slack_report_channel: "#transformers-ci-daily-quantization"
runner: daily-ci
docker: huggingface/transformers-quantization-latest-gpu
ci_event: Daily CI
secrets: inherit
#
# torch-pipeline:
# name: Torch pipeline CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_pipelines_torch_gpu
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
# runner: daily-ci
# docker: huggingface/transformers-pytorch-gpu
# ci_event: Daily CI
# secrets: inherit
#
# tf-pipeline:
# name: TF pipeline CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_pipelines_tf_gpu
# slack_report_channel: "#transformers-ci-daily-pipeline-tf"
# runner: daily-ci
# docker: huggingface/transformers-tensorflow-gpu
# ci_event: Daily CI
# secrets: inherit
#
# example-ci:
# name: Example CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_examples_gpu
# slack_report_channel: "#transformers-ci-daily-examples"
# runner: daily-ci
# docker: huggingface/transformers-all-latest-gpu
# ci_event: Daily CI
# secrets: inherit
#
# deepspeed-ci:
# name: DeepSpeed CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_torch_cuda_extensions_gpu
# slack_report_channel: "#transformers-ci-daily-deepspeed"
# runner: daily-ci
# docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
# ci_event: Daily CI
# working-directory-prefix: /workspace
# secrets: inherit
#
# quantization-ci:
# name: Quantization CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_quantization_torch_gpu
# slack_report_channel: "#transformers-ci-daily-quantization"
# runner: daily-ci
# docker: huggingface/transformers-quantization-latest-gpu
# ci_event: Daily CI
# secrets: inherit

View File

@ -49,6 +49,9 @@ if len(result.stderr) > 0:
if "ERROR: file or directory not found: " in result.stderr:
print("test not found in this commit")
exit(0)
elif "ERROR: not found: " in result.stderr:
print("test not found in this commit")
exit(0)
else:
print(f"pytest failed to run: {{result.stderr}}")
exit(-1)

View File

@ -31,6 +31,7 @@ def get_daily_ci_runs(token, num_runs=7):
def get_last_daily_ci_runs(token):
"""Get the last completed workflow run id of the scheduled (daily) CI."""
return "14233781160"
workflow_runs = get_daily_ci_runs(token)
workflow_run_id = None
for workflow_run in workflow_runs:

View File

@ -523,20 +523,20 @@ class Message:
extra_blocks = self.get_new_model_failure_blocks(to_truncate=False)
if extra_blocks:
failure_text = extra_blocks[-1]["text"]["text"]
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(failure_text)
# upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.txt")
commit_info = api.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.txt"
# extra processing to save to json format
new_failed_tests = {}
@ -550,15 +550,15 @@ class Message:
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
for url, device in items:
new_failed_tests[model][f"{device}-gpu"].append(line)
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
with open(file_path, "w", encoding="UTF-8") as fp:
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
# upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures_temp.json")
_ = api.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures_temp.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
@ -1220,6 +1220,7 @@ if __name__ == "__main__":
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
is_scheduled_ci_run = True
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
# results.
@ -1228,14 +1229,14 @@ if __name__ == "__main__":
json.dump(model_results, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
# if is_scheduled_ci_run:
# api.upload_file(
# path_or_fileobj=f"ci_results_{job_name}/model_results.json",
# path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
# repo_id="hf-internal-testing/transformers_daily_ci",
# repo_type="dataset",
# token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
# )
# Must have the same keys as in `additional_results`.
# The values are used as the file names where to save the corresponding CI job results.
@ -1250,14 +1251,14 @@ if __name__ == "__main__":
json.dump(job_result, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
# if is_scheduled_ci_run:
# api.upload_file(
# path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
# path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
# repo_id="hf-internal-testing/transformers_daily_ci",
# repo_type="dataset",
# token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
# )
prev_ci_artifacts = None
if is_scheduled_ci_run:

View File

@ -24,7 +24,7 @@ from huggingface_hub import HfApi
if __name__ == "__main__":
api = HfApi()
with open("new_model_failures_with_bad_commit.json") as fp:
with open("new_model_failures_with_bad_commit_temp.json") as fp:
data = json.load(fp)
# TODO: extend
@ -68,16 +68,16 @@ if __name__ == "__main__":
new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
# Upload to Hub and get the url
with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
with open("new_model_failures_with_bad_commit_grouped_by_authors_temp.json", "w") as fp:
json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
commit_info = api.upload_file(
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors_temp.json"
# Add `GH_` prefix as keyword mention
output = {}

View File

@ -62,4 +62,6 @@ if __name__ == "__main__":
start = end
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
model_splits.append(d[start:end])
# model_splits = [["models/vit"]]
print(model_splits)