Compare commits

..

2 Commits

5577 changed files with 701516 additions and 681968 deletions

View File

@ -7,25 +7,12 @@ parameters:
nightly:
type: boolean
default: false
GHA_Actor:
type: string
default: ""
GHA_Action:
type: string
default: ""
GHA_Event:
type: string
default: ""
GHA_Meta:
type: string
default: ""
jobs:
# Ensure running with CircleCI/huggingface
check_circleci_user:
docker:
- image: python:3.10-slim
resource_class: small
parallelism: 1
steps:
- run: echo $CIRCLE_PROJECT_USERNAME
@ -112,6 +99,8 @@ jobs:
- run:
name: "Retrieve Artifact Paths"
env:
CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
command: |
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
job_number=${CIRCLE_BUILD_NUM}
@ -156,7 +145,7 @@ jobs:
path: ~/transformers/installed.txt
- run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
- run: ruff check examples tests src utils
- run: ruff format examples tests src utils --check
- run: ruff format tests src utils --check
- run: python utils/custom_init_isort.py --check_only
- run: python utils/sort_auto_mappings.py --check_only
- run: python utils/check_doc_toc.py
@ -181,16 +170,17 @@ jobs:
path: ~/transformers/installed.txt
- run: python utils/check_copies.py
- run: python utils/check_modular_conversion.py
- run: python utils/check_table.py
- run: python utils/check_dummies.py
- run: python utils/check_repo.py
- run: python utils/check_inits.py
- run: python utils/check_pipeline_typing.py
- run: python utils/check_config_docstrings.py
- run: python utils/check_config_attributes.py
- run: python utils/check_doctest_list.py
- run: make deps_table_check_updated
- run: python utils/update_metadata.py --check-only
- run: python utils/check_docstrings.py
- run: python utils/check_support_list.py
workflows:
version: 2

View File

@ -16,9 +16,10 @@
import argparse
import copy
import os
import random
from dataclasses import dataclass
from typing import Any, Optional
from typing import Any, Dict, List, Optional
import glob
import yaml
@ -27,70 +28,36 @@ COMMON_ENV_VARIABLES = {
"TRANSFORMERS_IS_CI": True,
"PYTEST_TIMEOUT": 120,
"RUN_PIPELINE_TESTS": False,
# will be adjust in `CircleCIJob.to_dict`.
"RUN_FLAKY": True,
"DISABLE_SAFETENSORS_CONVERSION": True,
"RUN_PT_TF_CROSS_TESTS": False,
"RUN_PT_FLAX_CROSS_TESTS": False,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
# to rerun the tests that match these patterns.
FLAKY_TEST_FAILURE_PATTERNS = [
"OSError", # Machine/connection transient error
"Timeout", # Machine/connection transient error
"ConnectionError", # Connection transient error
"FileNotFoundError", # Raised by `datasets` on Hub failures
"PIL.UnidentifiedImageError", # Raised by `PIL.Image.open` on connection issues
"HTTPError", # Also catches HfHubHTTPError
"AssertionError: Tensor-likes are not close!", # `torch.testing.assert_close`, we might have unlucky random values
# TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
# them under a single message.
"TypeError: expected str, bytes or os.PathLike object, not NoneType",
"TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
"Converting from Tiktoken failed",
"KeyError: <class ",
"TypeError: not a string",
]
class EmptyJob:
job_name = "empty"
def to_dict(self):
steps = [{"run": 'ls -la'}]
if self.job_name == "collection_job":
steps.extend(
[
"checkout",
{"run": "pip install requests || true"},
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
{"store_artifacts": {"path": "outputs"}},
{"run": 'echo "All required jobs have now completed"'},
]
)
return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"resource_class": "small",
"steps": steps,
"steps":["checkout"],
}
@dataclass
class CircleCIJob:
name: str
additional_env: dict[str, Any] = None
docker_image: list[dict[str, str]] = None
install_steps: list[str] = None
additional_env: Dict[str, Any] = None
docker_image: List[Dict[str, str]] = None
install_steps: List[str] = None
marker: Optional[str] = None
parallelism: Optional[int] = 0
pytest_num_workers: int = 8
pytest_options: dict[str, Any] = None
resource_class: Optional[str] = "xlarge"
tests_to_run: Optional[list[str]] = None
pytest_num_workers: int = 12
pytest_options: Dict[str, Any] = None
resource_class: Optional[str] = "2xlarge"
tests_to_run: Optional[List[str]] = None
num_test_files_per_worker: Optional[int] = 10
# This should be only used for doctest job!
command_timeout: Optional[int] = None
@ -109,9 +76,7 @@ class CircleCIJob:
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
print(f"Using {self.docker_image} docker image")
if self.install_steps is None:
self.install_steps = ["uv pip install ."]
# Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
self.install_steps = ["uv venv && uv pip install ."]
if self.pytest_options is None:
self.pytest_options = {}
if isinstance(self.tests_to_run, str):
@ -130,14 +95,6 @@ class CircleCIJob:
def to_dict(self):
env = COMMON_ENV_VARIABLES.copy()
if self.job_name != "tests_hub":
# fmt: off
# not critical
env.update({"HF_TOKEN": "".join(["h", "f", "_", "H", "o", "d", "V", "u", "M", "q", "b", "R", "m", "t", "b", "z", "F", "Q", "O", "Q", "A", "J", "G", "D", "l", "V", "Q", "r", "R", "N", "w", "D", "M", "V", "C", "s", "d"])})
# fmt: on
# Do not run tests decorated by @is_flaky on pull requests
env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
env.update(self.additional_env)
job = {
@ -155,9 +112,7 @@ class CircleCIJob:
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
steps = [
"checkout",
@ -178,38 +133,18 @@ class CircleCIJob:
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
},
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
}
},
# During the CircleCI docker images build time, we might already (or not) download the data.
# If it's done already, the files are inside the directory `/test_data/`.
{"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
{"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}},
{"run": {
"name": "Run tests",
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
},
{"run":
{
"name": "Check for test crashes",
"when": "always",
"command": """if [ ! -f tests_output.txt ]; then
echo "ERROR: tests_output.txt does not exist - tests may not have run properly"
exit 1
elif grep -q "crashed and worker restarting disabled" tests_output.txt; then
echo "ERROR: Worker crash detected in test output"
echo "Found: crashed and worker restarting disabled"
exit 1
else
echo "Tests output file exists and no worker crashes detected"
fi"""
},
},
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
{"run": {"name": "Errors", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
{"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
{"store_test_results": {"path": "test-results"}},
{"store_artifacts": {"path": "test-results/junit.xml"}},
{"store_artifacts": {"path": "reports"}},
@ -228,79 +163,147 @@ class CircleCIJob:
# JOBS
torch_and_tf_job = CircleCIJob(
"torch_and_tf",
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
additional_env={"RUN_PT_TF_CROSS_TESTS": True},
marker="is_pt_tf_cross_test",
pytest_options={"rA": None, "durations": 0},
)
torch_and_flax_job = CircleCIJob(
"torch_and_flax",
additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
marker="is_pt_flax_cross_test",
pytest_options={"rA": None, "durations": 0},
)
torch_job = CircleCIJob(
"torch",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8
)
generate_job = CircleCIJob(
"generate",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv pip install ."],
marker="generate",
parallelism=6,
pytest_num_workers=8
)
tokenization_job = CircleCIJob(
"tokenization",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=16
)
processor_job = CircleCIJob(
"processors",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=6
)
tf_job = CircleCIJob(
"tf",
docker_image=[{"image":"huggingface/transformers-tf-light"}],
parallelism=6,
pytest_num_workers=16,
)
flax_job = CircleCIJob(
"flax",
docker_image=[{"image":"huggingface/transformers-jax-light"}],
parallelism=6,
pytest_num_workers=16
)
pipelines_torch_job = CircleCIJob(
"pipelines_torch",
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
marker="is_pipeline_test",
parallelism=4,
parallelism=4
)
pipelines_tf_job = CircleCIJob(
"pipelines_tf",
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-tf-light"}],
marker="is_pipeline_test",
parallelism=4
)
custom_tokenizers_job = CircleCIJob(
"custom_tokenizers",
additional_env={"RUN_CUSTOM_TOKENIZERS": True},
docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
)
examples_torch_job = CircleCIJob(
"examples_torch",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
# TODO @ArthurZucker remove this once docker is easier to build
install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=4,
install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=8,
)
examples_tensorflow_job = CircleCIJob(
"examples_tensorflow",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-tf"}],
pytest_num_workers=16,
)
hub_job = CircleCIJob(
"hub",
additional_env={"HUGGINGFACE_CO_STAGING": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
install_steps=[
'uv pip install .',
'uv venv && uv pip install .',
'git config --global user.email "ci@dummy.com"',
'git config --global user.name "ci"',
],
marker="is_staging_test",
pytest_num_workers=2,
resource_class="medium",
)
onnx_job = CircleCIJob(
"onnx",
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
install_steps=[
"uv venv",
"uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
],
pytest_options={"k onnx": None},
pytest_num_workers=1,
)
exotic_models_job = CircleCIJob(
"exotic_models",
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
pytest_num_workers=12,
parallelism=4,
pytest_options={"durations": 100},
)
repo_utils_job = CircleCIJob(
"repo_utils",
docker_image=[{"image":"huggingface/transformers-consistency"}],
@ -308,14 +311,13 @@ repo_utils_job = CircleCIJob(
resource_class="large",
)
non_model_job = CircleCIJob(
"non_model",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv pip install .[serving]"],
marker="not generate",
parallelism=6,
pytest_num_workers=8,
)
@ -331,7 +333,7 @@ doc_test_job = CircleCIJob(
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
install_steps=[
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
"uv pip install .",
"uv venv && pip install .",
"touch dummy.py",
command,
"cat pr_documentation_tests_temp.txt",
@ -343,14 +345,13 @@ doc_test_job = CircleCIJob(
pytest_num_workers=1,
)
REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job]
PIPELINE_TESTS = [pipelines_torch_job]
REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
@ -360,35 +361,19 @@ def create_circleci_config(folder=None):
if len(jobs) == 0:
jobs = [EmptyJob()]
else:
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
# Add a job waiting all the test jobs and aggregate their test summary files at the end
collection_job = EmptyJob()
collection_job.job_name = "collection_job"
jobs = [collection_job] + jobs
config = {
"version": "2.1",
"parameters": {
# Only used to accept the parameters from the trigger
"nightly": {"type": "boolean", "default": False},
# Only used to accept the parameters from GitHub Actions trigger
"GHA_Actor": {"type": "string", "default": ""},
"GHA_Action": {"type": "string", "default": ""},
"GHA_Event": {"type": "string", "default": ""},
"GHA_Meta": {"type": "string", "default": ""},
"tests_to_run": {"type": "string", "default": ""},
"tests_to_run": {"type": "string", "default": ''},
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
},
"jobs": {j.job_name: j.to_dict() for j in jobs}
"jobs" : {j.job_name: j.to_dict() for j in jobs},
"workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
}
if "CIRCLE_TOKEN" in os.environ:
# For private forked repo. (e.g. new model addition)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
else:
# For public repo. (e.g. `transformers`)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))

View File

@ -1,6 +1,5 @@
import argparse
import re
import argparse
def parse_pytest_output(file_path):
skipped_tests = {}

View File

@ -16,7 +16,7 @@ body:
id: system-info
attributes:
label: System Info
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
placeholder: transformers version, platform, python version, ...
validations:
required: true
@ -36,37 +36,26 @@ body:
Models:
- text models: @ArthurZucker @Cyrilvallez
- vision models: @yonigozlan @molbap
- audio models: @eustlb @ebezzam @vasqu
- multimodal models: @zucchini-nlp
- text models: @ArthurZucker
- vision models: @amyeroberts, @qubvel
- speech models: @ylacombe, @eustlb
- graph models: @clefourrier
Library:
- flax: @sanchit-gandhi
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- continuous batching: @remi-or @ArthurZucker @McPatate
- pipelines: @Rocketknight1
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker and @itazap
- trainer: @SunMarc
- attention: @vasqu @ArthurZucker @CyrilVallez
- model loading (from pretrained, etc): @CyrilVallez
- distributed: @3outeille @ArthurZucker
- CIs: @ydshieh
- trainer: @muellerzr @SunMarc
Integrations:
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization: @SunMarc @MekkCyber
- kernels: @MekkCyber @drbh
- peft: @BenjaminBossan @githubnemo
Devices/Backends:
- AMD ROCm: @ivarflakstad
- Intel XPU: @IlyasMoutawwakil
- Ascend NPU: @ivarflakstad
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
Documentation: @stevhliu
@ -74,6 +63,19 @@ body:
- for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
HF projects:
- accelerate: [different repo](https://github.com/huggingface/accelerate)
- datasets: [different repo](https://github.com/huggingface/datasets)
- diffusers: [different repo](https://github.com/huggingface/diffusers)
- rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
Maintained examples (not research project or legacy):
- Flax: @sanchit-gandhi
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
- TensorFlow: @Rocketknight1
Research projects are not maintained and should be taken as is.
placeholder: "@Username ..."
@ -104,7 +106,6 @@ body:
label: Reproduction
description: |
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
If you have code snippets, error messages, stack traces please provide them here as well.
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.

View File

@ -23,7 +23,7 @@ Some notes:
* Please translate in a gender-neutral way.
* Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
* Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu for review.
* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu and @MKhalusova for review.
* 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).
## Get Started section

View File

@ -6,7 +6,7 @@ body:
id: system-info
attributes:
label: System Info
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
render: shell
placeholder: transformers version, platform, python version, ...
validations:

View File

@ -39,40 +39,41 @@ members/contributors who may be interested in your PR.
Models:
- text models: @ArthurZucker @Cyrilvallez
- vision models: @yonigozlan @molbap
- audio models: @eustlb @ebezzam @vasqu
- multimodal models: @zucchini-nlp
- text models: @ArthurZucker
- vision models: @amyeroberts, @qubvel
- speech models: @ylacombe, @eustlb
- graph models: @clefourrier
Library:
- flax: @sanchit-gandhi
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- continuous batching: @remi-or @ArthurZucker @McPatate
- pipelines: @Rocketknight1
- tokenizers: @ArthurZucker and @itazap
- trainer: @SunMarc
- attention: @vasqu @ArthurZucker @CyrilVallez
- model loading (from pretrained, etc): @CyrilVallez
- distributed: @3outeille @ArthurZucker
- CIs: @ydshieh
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @muellerzr and @SunMarc
- chat templates: @Rocketknight1
Integrations:
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization: @SunMarc @MekkCyber
- kernels: @MekkCyber @drbh
- peft: @BenjaminBossan @githubnemo
Devices/Backends:
- AMD ROCm: @ivarflakstad
- Intel XPU: @IlyasMoutawwakil
- Ascend NPU: @ivarflakstad
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
Documentation: @stevhliu
Research projects are not maintained and should be taken as is.
HF projects:
- accelerate: [different repo](https://github.com/huggingface/accelerate)
- datasets: [different repo](https://github.com/huggingface/datasets)
- diffusers: [different repo](https://github.com/huggingface/diffusers)
- rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
Maintained examples (not research project or legacy):
- Flax: @sanchit-gandhi
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
- TensorFlow: @Rocketknight1
-->

View File

@ -1,39 +0,0 @@
# copilot-instructions.md Guide for Hugging Face Transformers
This copilot-instructions.md file provides guidance for code agents working with this codebase.
## Core Project Structure
- `/src/transformers`: This contains the core source code for the library
- `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
- `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
## Coding Conventions for Hugging Face Transformers
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
## Copying and inheritance
Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
We use two mechanisms to keep this code in sync:
- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
When adding new models, you should prefer `modular` style and inherit as many classes as possible from existing models.
## Testing
After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.

View File

@ -1,122 +0,0 @@
# coding=utf-8
# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import re
from collections import Counter
from pathlib import Path
import github
from github import Github
def pattern_to_regex(pattern):
if pattern.startswith("/"):
start_anchor = True
pattern = re.escape(pattern[1:])
else:
start_anchor = False
pattern = re.escape(pattern)
# Replace `*` with "any number of non-slash characters"
pattern = pattern.replace(r"\*", "[^/]*")
if start_anchor:
pattern = r"^\/?" + pattern # Allow an optional leading slash after the start of the string
return pattern
def get_file_owners(file_path, codeowners_lines):
# Process lines in reverse (last matching pattern takes precedence)
for line in reversed(codeowners_lines):
# Skip comments and empty lines, strip inline comments
line = line.split('#')[0].strip()
if not line:
continue
# Split into pattern and owners
parts = line.split()
pattern = parts[0]
# Can be empty, e.g. for dummy files with explicitly no owner!
owners = [owner.removeprefix("@") for owner in parts[1:]]
# Check if file matches pattern
file_regex = pattern_to_regex(pattern)
if re.search(file_regex, file_path) is not None:
return owners # Remember, can still be empty!
return [] # Should never happen, but just in case
def pr_author_is_in_hf(pr_author, codeowners_lines):
# Check if the PR author is in the codeowners file
for line in codeowners_lines:
line = line.split('#')[0].strip()
if not line:
continue
# Split into pattern and owners
parts = line.split()
owners = [owner.removeprefix("@") for owner in parts[1:]]
if pr_author in owners:
return True
return False
def main():
script_dir = Path(__file__).parent.absolute()
with open(script_dir / "codeowners_for_review_action") as f:
codeowners_lines = f.readlines()
g = Github(os.environ['GITHUB_TOKEN'])
repo = g.get_repo("huggingface/transformers")
with open(os.environ['GITHUB_EVENT_PATH']) as f:
event = json.load(f)
# The PR number is available in the event payload
pr_number = event['pull_request']['number']
pr = repo.get_pull(pr_number)
pr_author = pr.user.login
if pr_author_is_in_hf(pr_author, codeowners_lines):
print(f"PR author {pr_author} is in codeowners, skipping review request.")
return
existing_reviews = list(pr.get_reviews())
if existing_reviews:
print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
return
users_requested, teams_requested = pr.get_review_requests()
users_requested = list(users_requested)
if users_requested:
print(f"Reviewers already requested: {users_requested}")
return
locs_per_owner = Counter()
for file in pr.get_files():
owners = get_file_owners(file.filename, codeowners_lines)
for owner in owners:
locs_per_owner[owner] += file.changes
# Assign the top 2 based on locs changed as reviewers, but skip the owner if present
locs_per_owner.pop(pr_author, None)
top_owners = locs_per_owner.most_common(2)
print("Top owners", top_owners)
top_owners = [owner[0] for owner in top_owners]
try:
pr.create_review_request(top_owners)
except github.GithubException as e:
print(f"Failed to request review for {top_owners}: {e}")
if __name__ == "__main__":
main()

View File

@ -1,370 +0,0 @@
# Top-level rules are matched only if nothing else matches
* @Rocketknight1 @ArthurZucker # if no one is pinged based on the other rules, he will do the dispatch
*.md @stevhliu
*tokenization* @ArthurZucker
docs/ @stevhliu
/benchmark/ @McPatate
/docker/ @ydshieh @ArthurZucker
# More high-level globs catch cases when specific rules later don't apply
/src/transformers/models/*/processing* @molbap @yonigozlan
/src/transformers/models/*/image_processing* @yonigozlan
/src/transformers/models/*/image_processing_*_fast* @yonigozlan
# Owners of subsections of the library
/src/transformers/generation/ @gante
/src/transformers/pipeline/ @Rocketknight1 @yonigozlan
/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface
/src/transformers/quantizers/ @SunMarc @MekkCyber
tests/ @ydshieh
tests/generation/ @gante
/src/transformers/models/auto/ @ArthurZucker
/src/transformers/utils/ @ArthurZucker @Rocketknight1
/src/transformers/loss/ @ArthurZucker
/src/transformers/onnx/ @michaelbenayoun
# Specific files come after the sections/globs, so they take priority
/.circleci/config.yml @ArthurZucker @ydshieh
/utils/tests_fetcher.py @ydshieh
trainer.py @zach-huggingface @SunMarc
trainer_utils.py @zach-huggingface @SunMarc
/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker
# Owners of individual models are specific / high priority, and so they come last
# mod* captures modeling and modular files
# Text models
/src/transformers/models/albert/mod*_albert* @ArthurZucker
/src/transformers/models/bamba/mod*_bamba* @ArthurZucker
/src/transformers/models/bart/mod*_bart* @ArthurZucker
/src/transformers/models/barthez/mod*_barthez* @ArthurZucker
/src/transformers/models/bartpho/mod*_bartpho* @ArthurZucker
/src/transformers/models/bert/mod*_bert* @ArthurZucker
/src/transformers/models/bert_generation/mod*_bert_generation* @ArthurZucker
/src/transformers/models/bert_japanese/mod*_bert_japanese* @ArthurZucker
/src/transformers/models/bertweet/mod*_bertweet* @ArthurZucker
/src/transformers/models/big_bird/mod*_big_bird* @ArthurZucker
/src/transformers/models/bigbird_pegasus/mod*_bigbird_pegasus* @ArthurZucker
/src/transformers/models/biogpt/mod*_biogpt* @ArthurZucker
/src/transformers/models/blenderbot/mod*_blenderbot* @ArthurZucker
/src/transformers/models/blenderbot_small/mod*_blenderbot_small* @ArthurZucker
/src/transformers/models/bloom/mod*_bloom* @ArthurZucker
/src/transformers/models/bort/mod*_bort* @ArthurZucker
/src/transformers/models/byt5/mod*_byt5* @ArthurZucker
/src/transformers/models/camembert/mod*_camembert* @ArthurZucker
/src/transformers/models/canine/mod*_canine* @ArthurZucker
/src/transformers/models/codegen/mod*_codegen* @ArthurZucker
/src/transformers/models/code_llama/mod*_code_llama* @ArthurZucker
/src/transformers/models/cohere/mod*_cohere* @ArthurZucker
/src/transformers/models/cohere2/mod*_cohere2* @ArthurZucker
/src/transformers/models/convbert/mod*_convbert* @ArthurZucker
/src/transformers/models/cpm/mod*_cpm* @ArthurZucker
/src/transformers/models/cpmant/mod*_cpmant* @ArthurZucker
/src/transformers/models/ctrl/mod*_ctrl* @ArthurZucker
/src/transformers/models/dbrx/mod*_dbrx* @ArthurZucker
/src/transformers/models/deberta/mod*_deberta* @ArthurZucker
/src/transformers/models/deberta_v2/mod*_deberta_v2* @ArthurZucker
/src/transformers/models/dialogpt/mod*_dialogpt* @ArthurZucker
/src/transformers/models/diffllama/mod*_diffllama* @ArthurZucker
/src/transformers/models/distilbert/mod*_distilbert* @ArthurZucker
/src/transformers/models/dpr/mod*_dpr* @ArthurZucker
/src/transformers/models/electra/mod*_electra* @ArthurZucker
/src/transformers/models/encoder_decoder/mod*_encoder_decoder* @ArthurZucker
/src/transformers/models/ernie/mod*_ernie* @ArthurZucker
/src/transformers/models/ernie_m/mod*_ernie_m* @ArthurZucker
/src/transformers/models/esm/mod*_esm* @ArthurZucker
/src/transformers/models/falcon/mod*_falcon* @ArthurZucker
/src/transformers/models/falcon3/mod*_falcon3* @ArthurZucker
/src/transformers/models/falcon_mamba/mod*_falcon_mamba* @ArthurZucker
/src/transformers/models/fastspeech2_conformer/mod*_fastspeech2_conformer* @ArthurZucker
/src/transformers/models/flan_t5/mod*_flan_t5* @ArthurZucker
/src/transformers/models/flan_ul2/mod*_flan_ul2* @ArthurZucker
/src/transformers/models/flaubert/mod*_flaubert* @ArthurZucker
/src/transformers/models/fnet/mod*_fnet* @ArthurZucker
/src/transformers/models/fsmt/mod*_fsmt* @ArthurZucker
/src/transformers/models/funnel/mod*_funnel* @ArthurZucker
/src/transformers/models/fuyu/mod*_fuyu* @ArthurZucker
/src/transformers/models/gemma/mod*_gemma* @ArthurZucker
/src/transformers/models/gemma2/mod*_gemma2* @ArthurZucker
/src/transformers/models/glm/mod*_glm* @ArthurZucker
/src/transformers/models/openai_gpt/mod*_openai_gpt* @ArthurZucker
/src/transformers/models/gpt_neo/mod*_gpt_neo* @ArthurZucker
/src/transformers/models/gpt_neox/mod*_gpt_neox* @ArthurZucker
/src/transformers/models/gpt_neox_japanese/mod*_gpt_neox_japanese* @ArthurZucker
/src/transformers/models/gptj/mod*_gptj* @ArthurZucker
/src/transformers/models/gpt2/mod*_gpt2* @ArthurZucker
/src/transformers/models/gpt_bigcode/mod*_gpt_bigcode* @ArthurZucker
/src/transformers/models/gptsan_japanese/mod*_gptsan_japanese* @ArthurZucker
/src/transformers/models/gpt_sw3/mod*_gpt_sw3* @ArthurZucker
/src/transformers/models/granite/mod*_granite* @ArthurZucker
/src/transformers/models/granitemoe/mod*_granitemoe* @ArthurZucker
/src/transformers/models/herbert/mod*_herbert* @ArthurZucker
/src/transformers/models/ibert/mod*_ibert* @ArthurZucker
/src/transformers/models/jamba/mod*_jamba* @ArthurZucker
/src/transformers/models/jetmoe/mod*_jetmoe* @ArthurZucker
/src/transformers/models/jukebox/mod*_jukebox* @ArthurZucker
/src/transformers/models/led/mod*_led* @ArthurZucker
/src/transformers/models/llama/mod*_llama* @ArthurZucker @Cyrilvallez
/src/transformers/models/longformer/mod*_longformer* @ArthurZucker
/src/transformers/models/longt5/mod*_longt5* @ArthurZucker
/src/transformers/models/luke/mod*_luke* @ArthurZucker
/src/transformers/models/m2m_100/mod*_m2m_100* @ArthurZucker
/src/transformers/models/madlad_400/mod*_madlad_400* @ArthurZucker
/src/transformers/models/mamba/mod*_mamba* @ArthurZucker
/src/transformers/models/mamba2/mod*_mamba2* @ArthurZucker
/src/transformers/models/marian/mod*_marian* @ArthurZucker
/src/transformers/models/markuplm/mod*_markuplm* @ArthurZucker
/src/transformers/models/mbart/mod*_mbart* @ArthurZucker
/src/transformers/models/mega/mod*_mega* @ArthurZucker
/src/transformers/models/megatron_bert/mod*_megatron_bert* @ArthurZucker
/src/transformers/models/megatron_gpt2/mod*_megatron_gpt2* @ArthurZucker
/src/transformers/models/mistral/mod*_mistral* @ArthurZucker
/src/transformers/models/mixtral/mod*_mixtral* @ArthurZucker
/src/transformers/models/mluke/mod*_mluke* @ArthurZucker
/src/transformers/models/mobilebert/mod*_mobilebert* @ArthurZucker
/src/transformers/models/modernbert/mod*_modernbert* @ArthurZucker
/src/transformers/models/mpnet/mod*_mpnet* @ArthurZucker
/src/transformers/models/mpt/mod*_mpt* @ArthurZucker
/src/transformers/models/mra/mod*_mra* @ArthurZucker
/src/transformers/models/mt5/mod*_mt5* @ArthurZucker
/src/transformers/models/mvp/mod*_mvp* @ArthurZucker
/src/transformers/models/myt5/mod*_myt5* @ArthurZucker
/src/transformers/models/nemotron/mod*_nemotron* @ArthurZucker
/src/transformers/models/nezha/mod*_nezha* @ArthurZucker
/src/transformers/models/nllb/mod*_nllb* @ArthurZucker
/src/transformers/models/nllb_moe/mod*_nllb_moe* @ArthurZucker
/src/transformers/models/nystromformer/mod*_nystromformer* @ArthurZucker
/src/transformers/models/olmo/mod*_olmo* @ArthurZucker
/src/transformers/models/olmo2/mod*_olmo2* @ArthurZucker
/src/transformers/models/olmoe/mod*_olmoe* @ArthurZucker
/src/transformers/models/open_llama/mod*_open_llama* @ArthurZucker
/src/transformers/models/opt/mod*_opt* @ArthurZucker
/src/transformers/models/pegasus/mod*_pegasus* @ArthurZucker
/src/transformers/models/pegasus_x/mod*_pegasus_x* @ArthurZucker
/src/transformers/models/persimmon/mod*_persimmon* @ArthurZucker
/src/transformers/models/phi/mod*_phi* @ArthurZucker
/src/transformers/models/phi3/mod*_phi3* @ArthurZucker
/src/transformers/models/phimoe/mod*_phimoe* @ArthurZucker
/src/transformers/models/phobert/mod*_phobert* @ArthurZucker
/src/transformers/models/plbart/mod*_plbart* @ArthurZucker
/src/transformers/models/prophetnet/mod*_prophetnet* @ArthurZucker
/src/transformers/models/qdqbert/mod*_qdqbert* @ArthurZucker
/src/transformers/models/qwen2/mod*_qwen2* @ArthurZucker
/src/transformers/models/qwen2_moe/mod*_qwen2_moe* @ArthurZucker
/src/transformers/models/rag/mod*_rag* @ArthurZucker
/src/transformers/models/realm/mod*_realm* @ArthurZucker
/src/transformers/models/recurrent_gemma/mod*_recurrent_gemma* @ArthurZucker
/src/transformers/models/reformer/mod*_reformer* @ArthurZucker
/src/transformers/models/rembert/mod*_rembert* @ArthurZucker
/src/transformers/models/retribert/mod*_retribert* @ArthurZucker
/src/transformers/models/roberta/mod*_roberta* @ArthurZucker
/src/transformers/models/roberta_prelayernorm/mod*_roberta_prelayernorm* @ArthurZucker
/src/transformers/models/roc_bert/mod*_roc_bert* @ArthurZucker
/src/transformers/models/roformer/mod*_roformer* @ArthurZucker
/src/transformers/models/rwkv/mod*_rwkv* @ArthurZucker
/src/transformers/models/splinter/mod*_splinter* @ArthurZucker
/src/transformers/models/squeezebert/mod*_squeezebert* @ArthurZucker
/src/transformers/models/stablelm/mod*_stablelm* @ArthurZucker
/src/transformers/models/starcoder2/mod*_starcoder2* @ArthurZucker
/src/transformers/models/switch_transformers/mod*_switch_transformers* @ArthurZucker
/src/transformers/models/t5/mod*_t5* @ArthurZucker
/src/transformers/models/t5v1.1/mod*_t5v1.1* @ArthurZucker
/src/transformers/models/tapex/mod*_tapex* @ArthurZucker
/src/transformers/models/transfo_xl/mod*_transfo_xl* @ArthurZucker
/src/transformers/models/ul2/mod*_ul2* @ArthurZucker
/src/transformers/models/umt5/mod*_umt5* @ArthurZucker
/src/transformers/models/xmod/mod*_xmod* @ArthurZucker
/src/transformers/models/xglm/mod*_xglm* @ArthurZucker
/src/transformers/models/xlm/mod*_xlm* @ArthurZucker
/src/transformers/models/xlm_prophetnet/mod*_xlm_prophetnet* @ArthurZucker
/src/transformers/models/xlm_roberta/mod*_xlm_roberta* @ArthurZucker
/src/transformers/models/xlm_roberta_xl/mod*_xlm_roberta_xl* @ArthurZucker
/src/transformers/models/xlm_v/mod*_xlm_v* @ArthurZucker
/src/transformers/models/xlnet/mod*_xlnet* @ArthurZucker
/src/transformers/models/yoso/mod*_yoso* @ArthurZucker
/src/transformers/models/zamba/mod*_zamba* @ArthurZucker
# Vision models
/src/transformers/models/beit/mod*_beit* @yonigozlan @molbap
/src/transformers/models/bit/mod*_bit* @yonigozlan @molbap
/src/transformers/models/conditional_detr/mod*_conditional_detr* @yonigozlan @molbap
/src/transformers/models/convnext/mod*_convnext* @yonigozlan @molbap
/src/transformers/models/convnextv2/mod*_convnextv2* @yonigozlan @molbap
/src/transformers/models/cvt/mod*_cvt* @yonigozlan @molbap
/src/transformers/models/deformable_detr/mod*_deformable_detr* @yonigozlan @molbap
/src/transformers/models/deit/mod*_deit* @yonigozlan @molbap
/src/transformers/models/depth_anything/mod*_depth_anything* @yonigozlan @molbap
/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @yonigozlan @molbap
/src/transformers/models/deta/mod*_deta* @yonigozlan @molbap
/src/transformers/models/detr/mod*_detr* @yonigozlan @molbap
/src/transformers/models/dinat/mod*_dinat* @yonigozlan @molbap
/src/transformers/models/dinov2/mod*_dinov2* @yonigozlan @molbap
/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @yonigozlan @molbap
/src/transformers/models/dit/mod*_dit* @yonigozlan @molbap
/src/transformers/models/dpt/mod*_dpt* @yonigozlan @molbap
/src/transformers/models/efficientformer/mod*_efficientformer* @yonigozlan @molbap
/src/transformers/models/efficientnet/mod*_efficientnet* @yonigozlan @molbap
/src/transformers/models/focalnet/mod*_focalnet* @yonigozlan @molbap
/src/transformers/models/glpn/mod*_glpn* @yonigozlan @molbap
/src/transformers/models/hiera/mod*_hiera* @yonigozlan @molbap
/src/transformers/models/ijepa/mod*_ijepa* @yonigozlan @molbap
/src/transformers/models/imagegpt/mod*_imagegpt* @yonigozlan @molbap
/src/transformers/models/levit/mod*_levit* @yonigozlan @molbap
/src/transformers/models/mask2former/mod*_mask2former* @yonigozlan @molbap
/src/transformers/models/maskformer/mod*_maskformer* @yonigozlan @molbap
/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @yonigozlan @molbap
/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @yonigozlan @molbap
/src/transformers/models/mobilevit/mod*_mobilevit* @yonigozlan @molbap
/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @yonigozlan @molbap
/src/transformers/models/nat/mod*_nat* @yonigozlan @molbap
/src/transformers/models/poolformer/mod*_poolformer* @yonigozlan @molbap
/src/transformers/models/pvt/mod*_pvt* @yonigozlan @molbap
/src/transformers/models/pvt_v2/mod*_pvt_v2* @yonigozlan @molbap
/src/transformers/models/regnet/mod*_regnet* @yonigozlan @molbap
/src/transformers/models/resnet/mod*_resnet* @yonigozlan @molbap
/src/transformers/models/rt_detr/mod*_rt_detr* @yonigozlan @molbap
/src/transformers/models/segformer/mod*_segformer* @yonigozlan @molbap
/src/transformers/models/seggpt/mod*_seggpt* @yonigozlan @molbap
/src/transformers/models/superpoint/mod*_superpoint* @yonigozlan @molbap
/src/transformers/models/swiftformer/mod*_swiftformer* @yonigozlan @molbap
/src/transformers/models/swin/mod*_swin* @yonigozlan @molbap
/src/transformers/models/swinv2/mod*_swinv2* @yonigozlan @molbap
/src/transformers/models/swin2sr/mod*_swin2sr* @yonigozlan @molbap
/src/transformers/models/table_transformer/mod*_table_transformer* @yonigozlan @molbap
/src/transformers/models/textnet/mod*_textnet* @yonigozlan @molbap
/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @yonigozlan @molbap
/src/transformers/models/upernet/mod*_upernet* @yonigozlan @molbap
/src/transformers/models/van/mod*_van* @yonigozlan @molbap
/src/transformers/models/vit/mod*_vit* @yonigozlan @molbap
/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @yonigozlan @molbap
/src/transformers/models/vitdet/mod*_vitdet* @yonigozlan @molbap
/src/transformers/models/vit_mae/mod*_vit_mae* @yonigozlan @molbap
/src/transformers/models/vitmatte/mod*_vitmatte* @yonigozlan @molbap
/src/transformers/models/vit_msn/mod*_vit_msn* @yonigozlan @molbap
/src/transformers/models/vitpose/mod*_vitpose* @yonigozlan @molbap
/src/transformers/models/yolos/mod*_yolos* @yonigozlan @molbap
/src/transformers/models/zoedepth/mod*_zoedepth* @yonigozlan @molbap
# Audio models
/src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb
/src/transformers/models/bark/mod*_bark* @eustlb
/src/transformers/models/clap/mod*_clap* @eustlb
/src/transformers/models/dac/mod*_dac* @eustlb
/src/transformers/models/encodec/mod*_encodec* @eustlb
/src/transformers/models/hubert/mod*_hubert* @eustlb
/src/transformers/models/mctct/mod*_mctct* @eustlb
/src/transformers/models/mimi/mod*_mimi* @eustlb
/src/transformers/models/mms/mod*_mms* @eustlb
/src/transformers/models/moshi/mod*_moshi* @eustlb
/src/transformers/models/musicgen/mod*_musicgen* @eustlb
/src/transformers/models/musicgen_melody/mod*_musicgen_melody* @eustlb
/src/transformers/models/pop2piano/mod*_pop2piano* @eustlb
/src/transformers/models/seamless_m4t/mod*_seamless_m4t* @eustlb
/src/transformers/models/seamless_m4t_v2/mod*_seamless_m4t_v2* @eustlb
/src/transformers/models/sew/mod*_sew* @eustlb
/src/transformers/models/sew_d/mod*_sew_d* @eustlb
/src/transformers/models/speech_to_text/mod*_speech_to_text* @eustlb
/src/transformers/models/speech_to_text_2/mod*_speech_to_text_2* @eustlb
/src/transformers/models/speecht5/mod*_speecht5* @eustlb
/src/transformers/models/unispeech/mod*_unispeech* @eustlb
/src/transformers/models/unispeech_sat/mod*_unispeech_sat* @eustlb
/src/transformers/models/univnet/mod*_univnet* @eustlb
/src/transformers/models/vits/mod*_vits* @eustlb
/src/transformers/models/wav2vec2/mod*_wav2vec2* @eustlb
/src/transformers/models/wav2vec2_bert/mod*_wav2vec2_bert* @eustlb
/src/transformers/models/wav2vec2_conformer/mod*_wav2vec2_conformer* @eustlb
/src/transformers/models/wav2vec2_phoneme/mod*_wav2vec2_phoneme* @eustlb
/src/transformers/models/wavlm/mod*_wavlm* @eustlb
/src/transformers/models/whisper/mod*_whisper* @eustlb
/src/transformers/models/xls_r/mod*_xls_r* @eustlb
/src/transformers/models/xlsr_wav2vec2/mod*_xlsr_wav2vec2* @eustlb
# Video models
/src/transformers/models/timesformer/mod*_timesformer* @Rocketknight1
/src/transformers/models/videomae/mod*_videomae* @Rocketknight1
/src/transformers/models/vivit/mod*_vivit* @Rocketknight1
# Multimodal models
/src/transformers/models/align/mod*_align* @zucchini-nlp
/src/transformers/models/altclip/mod*_altclip* @zucchini-nlp
/src/transformers/models/aria/mod*_aria* @zucchini-nlp
/src/transformers/models/blip/mod*_blip* @zucchini-nlp
/src/transformers/models/blip_2/mod*_blip_2* @zucchini-nlp
/src/transformers/models/bridgetower/mod*_bridgetower* @zucchini-nlp
/src/transformers/models/bros/mod*_bros* @zucchini-nlp
/src/transformers/models/chameleon/mod*_chameleon* @zucchini-nlp
/src/transformers/models/chinese_clip/mod*_chinese_clip* @zucchini-nlp
/src/transformers/models/clip/mod*_clip* @zucchini-nlp
/src/transformers/models/clipseg/mod*_clipseg* @zucchini-nlp
/src/transformers/models/clvp/mod*_clvp* @zucchini-nlp
/src/transformers/models/colpali/mod*_colpali* @zucchini-nlp @yonigozlan
/src/transformers/models/data2vec/mod*_data2vec* @zucchini-nlp
/src/transformers/models/deplot/mod*_deplot* @zucchini-nlp
/src/transformers/models/donut/mod*_donut* @zucchini-nlp
/src/transformers/models/flava/mod*_flava* @zucchini-nlp
/src/transformers/models/git/mod*_git* @zucchini-nlp
/src/transformers/models/grounding_dino/mod*_grounding_dino* @yonigozlan
/src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp
/src/transformers/models/idefics/mod*_idefics* @zucchini-nlp
/src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp
/src/transformers/models/idefics3/mod*_idefics3* @zucchini-nlp
/src/transformers/models/instructblip/mod*_instructblip* @zucchini-nlp
/src/transformers/models/instructblipvideo/mod*_instructblipvideo* @zucchini-nlp
/src/transformers/models/kosmos_2/mod*_kosmos_2* @zucchini-nlp
/src/transformers/models/layoutlm/mod*_layoutlm* @NielsRogge
/src/transformers/models/layoutlmv2/mod*_layoutlmv2* @NielsRogge
/src/transformers/models/layoutlmv3/mod*_layoutlmv3* @NielsRogge
/src/transformers/models/layoutxlm/mod*_layoutxlm* @NielsRogge
/src/transformers/models/lilt/mod*_lilt* @zucchini-nlp
/src/transformers/models/llava/mod*_llava* @zucchini-nlp @arthurzucker
/src/transformers/models/llava_next/mod*_llava_next* @zucchini-nlp
/src/transformers/models/llava_next_video/mod*_llava_next_video* @zucchini-nlp
/src/transformers/models/llava_onevision/mod*_llava_onevision* @zucchini-nlp
/src/transformers/models/lxmert/mod*_lxmert* @zucchini-nlp
/src/transformers/models/matcha/mod*_matcha* @zucchini-nlp
/src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp
/src/transformers/models/mllama/mod*_mllama* @zucchini-nlp
/src/transformers/models/nougat/mod*_nougat* @NielsRogge
/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @yonigozlan
/src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp
/src/transformers/models/owlvit/mod*_owlvit* @yonigozlan
/src/transformers/models/owlv2/mod*_owlv2* @yonigozlan
/src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap
/src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp
/src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp
/src/transformers/models/pixtral/mod*_pixtral* @zucchini-nlp @ArthurZucker
/src/transformers/models/qwen2_audio/mod*_qwen2_audio* @zucchini-nlp @ArthurZucker
/src/transformers/models/qwen2_vl/mod*_qwen2_vl* @zucchini-nlp @ArthurZucker
/src/transformers/models/sam/mod*_sam* @zucchini-nlp @ArthurZucker
/src/transformers/models/siglip/mod*_siglip* @zucchini-nlp
/src/transformers/models/speech_encoder_decoder/mod*_speech_encoder_decoder* @zucchini-nlp
/src/transformers/models/tapas/mod*_tapas* @NielsRogge
/src/transformers/models/trocr/mod*_trocr* @zucchini-nlp
/src/transformers/models/tvlt/mod*_tvlt* @zucchini-nlp
/src/transformers/models/tvp/mod*_tvp* @zucchini-nlp
/src/transformers/models/udop/mod*_udop* @zucchini-nlp
/src/transformers/models/video_llava/mod*_video_llava* @zucchini-nlp
/src/transformers/models/vilt/mod*_vilt* @zucchini-nlp
/src/transformers/models/vipllava/mod*_vipllava* @zucchini-nlp
/src/transformers/models/vision_encoder_decoder/mod*_vision_encoder_decoder* @Rocketknight1
/src/transformers/models/vision_text_dual_encoder/mod*_vision_text_dual_encoder* @Rocketknight1
/src/transformers/models/visual_bert/mod*_visual_bert* @zucchini-nlp
/src/transformers/models/xclip/mod*_xclip* @zucchini-nlp
# Reinforcement learning models
/src/transformers/models/decision_transformer/mod*_decision_transformer* @Rocketknight1
/src/transformers/models/trajectory_transformer/mod*_trajectory_transformer* @Rocketknight1
# Time series models
/src/transformers/models/autoformer/mod*_autoformer* @Rocketknight1
/src/transformers/models/informer/mod*_informer* @Rocketknight1
/src/transformers/models/patchtsmixer/mod*_patchtsmixer* @Rocketknight1
/src/transformers/models/patchtst/mod*_patchtst* @Rocketknight1
/src/transformers/models/time_series_transformer/mod*_time_series_transformer* @Rocketknight1
# Graph models
/src/transformers/models/graphormer/mod*_graphormer* @clefourrier
# Finally, files with no owners that shouldn't generate pings, usually automatically generated and checked in the CI
utils/dummy*

View File

@ -54,7 +54,7 @@ jobs:
- name: Create model files
run: |
. ~/venv/bin/activate
transformers add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
transformers-cli add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
make style
make fix-copies

View File

@ -1,26 +0,0 @@
name: Assign PR Reviewers
on:
pull_request_target:
branches:
- main
types: [ready_for_review]
jobs:
assign_reviewers:
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install PyGithub
- name: Run assignment script
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python .github/scripts/assign_reviewers.py

View File

@ -12,16 +12,13 @@ concurrency:
env:
HF_HOME: /mnt/cache
DATASET_ID: hf-benchmarks/transformers
MODEL_ID: meta-llama/Llama-3.1-8B-Instruct
jobs:
benchmark:
name: Benchmark
strategy:
matrix:
# group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
group: [aws-g5-4xlarge-cache]
group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
runs-on:
group: ${{ matrix.group }}
if: |
@ -36,12 +33,26 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Install libpq-dev & psql
run: |
apt update
apt install -y libpq-dev postgresql-client
- name: Install benchmark script dependencies
run: python3 -m pip install -r benchmark_v2/requirements.txt kernels
run: python3 -m pip install -r benchmark/requirements.txt
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]" && python3 -m pip uninstall -y torchvision # temp fix
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
- name: Run database init script
run: |
psql -f benchmark/init_db.sql
env:
PGDATABASE: metrics
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
PGUSER: transformers_benchmarks
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
- name: Run benchmark
run: |
@ -52,11 +63,12 @@ jobs:
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
PUSH_TO_HUB_TOKEN: ${{ secrets.PUSH_TO_HUB_TOKEN }}
# Enable this to see debug logs
# HF_HUB_VERBOSITY: debug
# TRANSFORMERS_VERBOSITY: debug
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
PGUSER: transformers_benchmarks
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}

View File

@ -1,57 +0,0 @@
name: Benchmark v2 Framework
on:
workflow_dispatch:
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
jobs:
benchmark-v2:
name: Benchmark v2
runs-on: ${{ inputs.runner }}
if: |
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) ||
(github.event_name == 'schedule')
container:
image: ${{ inputs.container_image }}
options: ${{ inputs.container_options }}
steps:
- name: Get repo
uses: actions/checkout@v4
with:
ref: ${{ inputs.commit_sha || github.sha }}
- name: Install benchmark dependencies
run: |
python3 -m pip install -r benchmark_v2/requirements.txt
- name: Reinstall transformers in edit mode
run: |
python3 -m pip uninstall -y transformers
python3 -m pip install -e ".[torch]"
- name: Show installed libraries and their versions
run: |
python3 -m pip list
python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true
nvidia-smi || true
- name: Run benchmark v2
working-directory: benchmark_v2
run: |
echo "Running benchmarks"
python3 run_benchmarks.py \
--commit-id '${{ inputs.commit_sha || github.sha }}' \
--run-id '${{ inputs.run_id }}' \
--push-to-hub '${{ inputs.benchmark_repo_id}}' \
--token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
--log-level INFO
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}

View File

@ -1,17 +0,0 @@
name: Benchmark v2 Scheduled Runner - A10 Single-GPU
on:
workflow_dispatch:
jobs:
benchmark-v2-default:
name: Benchmark v2 - Default Models
uses: ./.github/workflows/benchmark_v2.yml
with:
runner: aws-g5-4xlarge-cache-use1-public-80
container_image: huggingface/transformers-pytorch-gpu
container_options: --gpus all --privileged --ipc host --shm-size "16gb"
commit_sha: ${{ github.sha }}
run_id: ${{ github.run_id }}
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
secrets: inherit

View File

@ -1,17 +0,0 @@
name: Benchmark v2 Scheduled Runner - MI325 Single-GPU
on:
workflow_dispatch:
jobs:
benchmark-v2-default:
name: Benchmark v2 - Default Models
uses: ./.github/workflows/benchmark_v2.yml
with:
runner: amd-mi325-ci-1gpu
container_image: huggingface/transformers-pytorch-amd-gpu
container_options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache
commit_sha: ${{ github.sha }}
run_id: ${{ github.run_id }}
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
secrets: inherit

View File

@ -26,7 +26,7 @@ jobs:
strategy:
matrix:
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "exotic-models", "examples-torch"]
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"]
continue-on-error: true
steps:

View File

@ -5,7 +5,6 @@ on:
branches:
- build_ci_docker_image*
repository_dispatch:
workflow_dispatch:
workflow_call:
inputs:
image_postfix:
@ -20,7 +19,7 @@ concurrency:
jobs:
latest-docker:
name: "Latest PyTorch [dev]"
name: "Latest PyTorch + TensorFlow [dev]"
runs-on:
group: aws-general-8-plus
steps:
@ -71,7 +70,7 @@ jobs:
latest-torch-deepspeed-docker:
name: "Latest PyTorch + DeepSpeed"
runs-on:
group: aws-g4dn-2xlarge-cache
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@ -222,7 +221,7 @@ jobs:
latest-pytorch-amd:
name: "Latest PyTorch (AMD) [dev]"
runs-on:
group: aws-highcpu-32-priv
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@ -268,6 +267,44 @@ jobs:
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-tensorflow:
name: "Latest TensorFlow [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-tensorflow-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-tensorflow-gpu
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch-deepspeed-amd:
name: "PyTorch + DeepSpeed (AMD) [dev]"
runs-on:

View File

@ -2,10 +2,6 @@ name: Build docker images (Nightly CI)
on:
workflow_call:
inputs:
job:
required: true
type: string
push:
branches:
- build_nightly_ci_docker_image*
@ -16,8 +12,7 @@ concurrency:
jobs:
latest-with-torch-nightly-docker:
name: "Nightly PyTorch"
if: inputs.job == 'latest-with-torch-nightly-docker' || inputs.job == ''
name: "Nightly PyTorch + Stable TensorFlow"
runs-on:
group: aws-general-8-plus
steps:
@ -46,9 +41,8 @@ jobs:
nightly-torch-deepspeed-docker:
name: "Nightly PyTorch + DeepSpeed"
if: inputs.job == 'nightly-torch-deepspeed-docker' || inputs.job == ''
runs-on:
group: aws-g4dn-2xlarge-cache
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx

View File

@ -16,19 +16,7 @@ jobs:
commit_sha: ${{ github.sha }}
package: transformers
notebook_folder: transformers_doc
languages: en
custom_container: huggingface/transformers-doc-builder
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
build_other_lang:
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
with:
commit_sha: ${{ github.sha }}
package: transformers
notebook_folder: transformers_doc
languages: ar de es fr hi it ja ko pt zh
languages: ar de en es fr hi it ko pt tr zh ja te
custom_container: huggingface/transformers-doc-builder
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}

View File

@ -14,4 +14,5 @@ jobs:
commit_sha: ${{ github.event.pull_request.head.sha }}
pr_number: ${{ github.event.number }}
package: transformers
languages: en
languages: ar de en es fr hi it ko pt tr zh ja te
custom_container: huggingface/transformers-doc-builder

View File

@ -0,0 +1,129 @@
name: Process failed tests
on:
workflow_call:
inputs:
docker:
required: true
type: string
start_sha:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
run_models_gpu:
name: " "
runs-on:
group: aws-g4dn-2xlarge-cache
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_run_models_gpu
path: /transformers/ci_results_run_models_gpu
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Get target commit
working-directory: /transformers/utils
run: |
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
- name: Checkout to `start_sha`
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.start_sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Check failed tests
working-directory: /transformers
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
- name: Show results
working-directory: /transformers
run: |
ls -l new_model_failures_with_bad_commit.json
cat new_model_failures_with_bad_commit.json
- name: Checkout back
working-directory: /transformers
run: |
git checkout ${{ inputs.start_sha }}
- name: Process report
shell: bash
working-directory: /transformers
env:
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
python3 utils/process_bad_commit_report.py
- name: Process report
shell: bash
working-directory: /transformers
env:
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
{
echo 'REPORT_TEXT<<EOF'
python3 utils/process_bad_commit_report.py
echo EOF
} >> "$GITHUB_ENV"
- name: Send processed report
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: '#transformers-ci-feedback-tests'
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "${{ env.REPORT_TEXT }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -1,255 +0,0 @@
name: Process failed tests
on:
workflow_call:
inputs:
docker:
required: true
type: string
start_sha:
required: true
type: string
job:
required: true
type: string
slack_report_channel:
required: true
type: string
ci_event:
required: true
type: string
report_repo_id:
required: true
type: string
commit_sha:
required: false
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
check_new_failures:
name: "Find commits for new failing tests"
strategy:
matrix:
run_idx: [1]
runs-on:
group: aws-g5-4xlarge-cache
outputs:
process: ${{ steps.check_file.outputs.process }}
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: /transformers/ci_results_${{ inputs.job }}
- name: Check file
id: check_file
working-directory: /transformers
run: |
if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
echo "process=true" >> $GITHUB_ENV
echo "process=true" >> $GITHUB_OUTPUT
else
echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
echo "process=false" >> $GITHUB_ENV
echo "process=false" >> $GITHUB_OUTPUT
fi
- uses: actions/download-artifact@v4
if: ${{ env.process == 'true' }}
with:
pattern: setup_values*
path: setup_values
merge-multiple: true
- name: Prepare some setup values
if: ${{ env.process == 'true' }}
run: |
if [ -f setup_values/prev_workflow_run_id.txt ]; then
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
if [ -f setup_values/other_workflow_run_id.txt ]; then
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
- name: Update clone
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
- name: Get target commit
working-directory: /transformers/utils
if: ${{ env.process == 'true' }}
run: |
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
- name: Checkout to `start_sha`
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: git fetch && git checkout ${{ inputs.start_sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
if: ${{ env.process == 'true' }}
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: |
python3 utils/print_env.py
- name: Install pytest-flakefinder
if: ${{ env.process == 'true' }}
run: python3 -m pip install pytest-flakefinder
- name: Show installed libraries and their versions
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: pip freeze
- name: Check failed tests
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
- name: Show results
working-directory: /transformers
if: ${{ env.process == 'true' }}
run: |
ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
process_new_failures_with_commit_info:
name: "process bad commit reports"
needs: check_new_failures
if: needs.check_new_failures.outputs.process == 'true'
runs-on:
group: aws-g5-4xlarge-cache
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: /transformers/ci_results_${{ inputs.job }}
- uses: actions/download-artifact@v4
with:
pattern: new_failures_with_bad_commit_${{ inputs.job }}*
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
merge-multiple: true
- name: Check files
working-directory: /transformers
run: |
ls -la /transformers
ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }}
# Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
# to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
- name: Merge files
shell: bash
working-directory: /transformers
run: |
cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
- name: Process report
shell: bash
working-directory: /transformers
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
JOB_NAME: ${{ inputs.job }}
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
run: |
python3 utils/process_bad_commit_report.py
- name: Process report
shell: bash
working-directory: /transformers
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
JOB_NAME: ${{ inputs.job }}
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
run: |
{
echo 'REPORT_TEXT<<EOF'
python3 utils/process_bad_commit_report.py
echo EOF
} >> "$GITHUB_ENV"
- name: Prepare Slack report title
working-directory: /transformers
run: |
pip install slack_sdk
echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
- name: Send processed report
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: '#${{ inputs.slack_report_channel }}'
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "${{ env.title }}"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "${{ env.REPORT_TEXT }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -1,43 +0,0 @@
name: CI collated reports
on:
workflow_call:
inputs:
job:
required: true
type: string
report_repo_id:
required: true
type: string
machine_type:
required: true
type: string
gpu_name:
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
required: true
type: string
jobs:
collated_reports:
name: Collated reports
runs-on: ubuntu-22.04
if: always()
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Collated reports
shell: bash
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_SHA: ${{ github.sha }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
pip install huggingface_hub
python3 utils/collated_reports.py \
--path . \
--machine-type ${{ inputs.machine_type }} \
--commit-hash ${{ env.CI_SHA }} \
--job ${{ inputs.job }} \
--report-repo-id ${{ inputs.report_repo_id }} \
--gpu-name ${{ inputs.gpu_name }}

View File

@ -16,6 +16,7 @@ env:
RUN_SLOW: yes
OMP_NUM_THREADS: 16
MKL_NUM_THREADS: 16
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
jobs:
@ -27,10 +28,10 @@ jobs:
matrix:
split_keys: ${{ fromJson(inputs.split_keys) }}
runs-on:
group: aws-g5-4xlarge-cache
group: aws-g4dn-2xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers

View File

@ -15,10 +15,10 @@ jobs:
setup:
name: Setup
runs-on:
group: aws-g5-4xlarge-cache
group: aws-g4dn-2xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
job_splits: ${{ steps.set-matrix.outputs.job_splits }}
split_keys: ${{ steps.set-matrix.outputs.split_keys }}

View File

@ -1,157 +0,0 @@
name: Get PR commit SHA
on:
workflow_call:
inputs:
pr_number:
required: true
type: string
outputs:
PR_HEAD_REPO_FULL_NAME:
description: "The full name of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
PR_BASE_REPO_FULL_NAME:
description: "The full name of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
PR_HEAD_REPO_OWNER:
description: "The owner of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
PR_BASE_REPO_OWNER:
description: "The owner of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
PR_HEAD_REPO_NAME:
description: "The name of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
PR_BASE_REPO_NAME:
description: "The name of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
PR_HEAD_REF:
description: "The branch name of the pull request in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
PR_BASE_REF:
description: "The branch name in the base repository (to merge into)"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
PR_HEAD_SHA:
description: "The head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
PR_BASE_SHA:
description: "The head sha of the target branch in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
PR_MERGE_COMMIT_SHA:
description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
PR_HEAD_COMMIT_DATE:
description: "The date of the head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
PR_MERGE_COMMIT_DATE:
description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
PR_HEAD_COMMIT_TIMESTAMP:
description: "The timestamp of the head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
PR_MERGE_COMMIT_TIMESTAMP:
description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
PR:
description: "The PR"
value: ${{ jobs.get-pr-info.outputs.PR }}
PR_FILES:
description: "The files touched in the PR"
value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
jobs:
get-pr-info:
runs-on: ubuntu-22.04
name: Get PR commit SHA better
outputs:
PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
PR: ${{ steps.pr_info.outputs.pr }}
PR_FILES: ${{ steps.pr_info.outputs.files }}
if: ${{ inputs.pr_number != '' }}
steps:
- name: Extract PR details
id: pr_info
uses: actions/github-script@v6
with:
script: |
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ inputs.pr_number }}
});
const { data: head_commit } = await github.rest.repos.getCommit({
owner: pr.head.repo.owner.login,
repo: pr.head.repo.name,
ref: pr.head.ref
});
const { data: merge_commit } = await github.rest.repos.getCommit({
owner: pr.base.repo.owner.login,
repo: pr.base.repo.name,
ref: pr.merge_commit_sha,
});
const { data: files } = await github.rest.pulls.listFiles({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ inputs.pr_number }}
});
core.setOutput('head_repo_full_name', pr.head.repo.full_name);
core.setOutput('base_repo_full_name', pr.base.repo.full_name);
core.setOutput('head_repo_owner', pr.head.repo.owner.login);
core.setOutput('base_repo_owner', pr.base.repo.owner.login);
core.setOutput('head_repo_name', pr.head.repo.name);
core.setOutput('base_repo_name', pr.base.repo.name);
core.setOutput('head_ref', pr.head.ref);
core.setOutput('base_ref', pr.base.ref);
core.setOutput('head_sha', pr.head.sha);
core.setOutput('base_sha', pr.base.sha);
core.setOutput('merge_commit_sha', pr.merge_commit_sha);
core.setOutput('pr', pr);
core.setOutput('head_commit_date', head_commit.commit.committer.date);
core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
core.setOutput('files', files);
console.log('PR head commit:', {
head_commit: head_commit,
commit: head_commit.commit,
date: head_commit.commit.committer.date
});
console.log('PR merge commit:', {
merge_commit: merge_commit,
commit: merge_commit.commit,
date: merge_commit.commit.committer.date
});
- name: Convert dates to timestamps
id: get_timestamps
run: |
head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
echo $head_commit_date
echo $merge_commit_date
head_commit_timestamp=$(date -d "$head_commit_date" +%s)
merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
echo $head_commit_timestamp
echo $merge_commit_timestamp
echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT

View File

@ -1,36 +0,0 @@
name: Get PR number
on:
workflow_call:
outputs:
PR_NUMBER:
description: "The extracted PR number"
value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
jobs:
get-pr-number:
runs-on: ubuntu-22.04
name: Get PR number
outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps:
- name: Get PR number
shell: bash
run: |
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
elif [[ "${{ github.event.pull_request }}" != "" ]]; then
echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
else
echo "PR_NUMBER=" >> $GITHUB_ENV
fi
- name: Check PR number
shell: bash
run: |
echo "${{ env.PR_NUMBER }}"
- name: Set PR number
id: set_pr_number
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"

View File

@ -12,21 +12,11 @@ on:
slice_id:
required: true
type: number
docker:
runner:
required: true
type: string
commit_sha:
required: false
type: string
report_name_prefix:
required: false
default: run_models_gpu
type: string
runner_type:
required: false
type: string
report_repo_id:
required: false
docker:
required: true
type: string
env:
@ -38,7 +28,9 @@ env:
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
@ -54,8 +46,6 @@ jobs:
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
machine_type: ${{ steps.set_machine_type.outputs.machine_type }}
steps:
- name: Echo input and matrix info
shell: bash
@ -77,7 +67,7 @@ jobs:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
@ -109,15 +99,14 @@ jobs:
run: pip freeze
- name: Set `machine_type` for report and artifact names
id: set_machine_type
working-directory: /transformers
shell: bash
run: |
echo "${{ inputs.machine_type }}"
if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ inputs.machine_type }}
@ -125,58 +114,26 @@ jobs:
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
echo "machine_type=$machine_type" >> $GITHUB_OUTPUT
- name: Create report directory if it doesn't exist
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
echo "dummy" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/dummy.txt
ls -la /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
- name: Run all tests on GPU
working-directory: /transformers
run: |
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
ls -la
# Extract the exit code from the output file
EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)
exit ${EXIT_CODE:-1}
run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
# This step is only to show information on Github Actions log.
# Always mark this step as successful, even if the report directory or the file `failures_short.txt` in it doesn't exist
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/failures_short.txt
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Captured information
if: ${{ failure() }}
continue-on-error: true
- name: Run test
shell: bash
run: |
cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/captured_info.txt
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: Copy test_outputs.txt
if: ${{ always() }}
continue-on-error: true
run: |
cp /transformers/test_outputs.txt /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
collated_reports:
name: Collated Reports
if: ${{ always() }}
needs: run_models_gpu
uses: huggingface/transformers/.github/workflows/collated-reports.yml@main
with:
job: run_models_gpu
report_repo_id: ${{ inputs.report_repo_id }}
gpu_name: ${{ inputs.runner_type }}
machine_type: ${{ needs.run_models_gpu.outputs.machine_type }}
secrets: inherit
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

129
.github/workflows/model_jobs_amd.yml vendored Normal file
View File

@ -0,0 +1,129 @@
name: model jobs
on:
workflow_call:
inputs:
folder_slices:
required: true
type: string
machine_type:
required: true
type: string
slice_id:
required: true
type: number
runner:
required: true
type: string
docker:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
run_models_gpu:
name: " "
strategy:
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ inputs.folder_slices }}"
echo "${{ matrix.folders }}"
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') }}
working-directory: /transformers
run: |
python3 -m pip install -U datasets
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
working-directory: /transformers
run: |
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Run test
shell: bash
run: |
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

View File

@ -1,120 +0,0 @@
name: model jobs
on:
workflow_call:
inputs:
folder_slices:
required: true
type: string
slice_id:
required: true
type: number
runner:
required: true
type: string
machine_type:
required: true
type: string
report_name_prefix:
required: false
default: run_models_gpu
type: string
env:
RUN_SLOW: yes
PT_HPU_LAZY_MODE: 0
TRANSFORMERS_IS_CI: yes
PT_ENABLE_INT64_SUPPORT: 1
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache/.cache/huggingface
jobs:
run_models_gpu:
name: " "
strategy:
max-parallel: 8
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on:
group: ${{ inputs.runner }}
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
--env HABANA_VISIBLE_DEVICES
--env HABANA_VISIBLE_MODULES
--cap-add=sys_nice
--shm-size=64G
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ inputs.folder_slices }}"
echo "${{ matrix.folders }}"
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install dependencies
run: |
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Environment
run: python3 utils/print_env.py
- name: Show installed libraries and their versions
run: pip freeze
- name: Set `machine_type` for report and artifact names
shell: bash
run: |
if [ "${{ inputs.machine_type }}" = "1gaudi" ]; then
machine_type=single-gpu
elif [ "${{ inputs.machine_type }}" = "2gaudi" ]; then
machine_type=multi-gpu
else
machine_type=${{ inputs.machine_type }}
fi
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run all tests on Gaudi
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Run test
shell: bash
run: |
mkdir -p reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
echo "hello" > reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
path: reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports

View File

@ -1,68 +0,0 @@
# Used to notify core maintainers about new model PR being merged
name: New model PR merged notification
on:
push:
branches:
- main
paths:
- 'src/transformers/models/*/modeling_*'
jobs:
notify_new_model:
name: Notify new model
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check new model
shell: bash
run: |
python -m pip install gitpython
python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt
echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV
echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV
- name: print commit sha
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$COMMIT_SHA"
- name: print new model
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$NEW_MODEL"
- name: Notify
if: ${{ env.NEW_MODEL != ''}}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: transformers-new-model-notification
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "New model!",
"emoji": true
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh\ncommit SHA: ${{ env.COMMIT_SHA }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -1,18 +0,0 @@
# To run this bot, comment "@bot /style" on a PR
name: Style Bot
on:
issue_comment:
types: [created]
permissions:
pull-requests: write
jobs:
style:
uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
with:
python_quality_dependencies: "[quality]"
style_command_type: "default"
secrets:
bot_token: ${{ secrets.HF_STYLE_BOT_ACTION }}

View File

@ -1,134 +0,0 @@
name: PR - build doc via comment
on:
issue_comment:
types:
- created
branches-ignore:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'build-doc') }}
cancel-in-progress: true
permissions: {}
jobs:
get-pr-number:
name: Get PR number
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'build-doc')) }}
uses: ./.github/workflows/get-pr-number.yml
get-pr-info:
name: Get PR commit SHA
needs: get-pr-number
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
uses: ./.github/workflows/get-pr-info.yml
with:
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
verity_pr_commit:
name: Verity PR commit corresponds to a specific event by comparing timestamps
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
runs-on: ubuntu-22.04
needs: get-pr-info
env:
COMMENT_DATE: ${{ github.event.comment.created_at }}
PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
steps:
- run: |
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
echo "COMMENT_DATE: $COMMENT_DATE"
echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE"
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
exit -1;
fi
create_run:
name: Create run
needs: [get-pr-number, get-pr-info]
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }}
permissions:
statuses: write
runs-on: ubuntu-22.04
steps:
- name: Create Run
id: create_run
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Custom doc building job" -f "context=custom-doc-build"
reply_to_comment:
name: Reply to the comment
if: ${{ needs.create_run.result == 'success' }}
needs: [get-pr-number, create_run]
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- name: Reply to the comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-f "body=[Building docs for all languages...](${{ env.GITHUB_RUN_URL }})"
build-doc:
name: Build doc
needs: [get-pr-number, get-pr-info]
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }}
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
with:
commit_sha: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
package: transformers
languages: ar de en es fr hi it ja ko pt zh
update_run_status:
name: Update Check Run Status
needs: [ get-pr-info, create_run, build-doc ]
permissions:
statuses: write
if: ${{ always() && needs.create_run.result == 'success' }}
runs-on: ubuntu-22.04
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.create_run.result) }}
steps:
- name: Get `build-doc` job status
run: |
echo "${{ needs.build-doc.result }}"
echo $STATUS_OK
if [ "$STATUS_OK" = "true" ]; then
echo "STATUS=success" >> $GITHUB_ENV
else
echo "STATUS=failure" >> $GITHUB_ENV
fi
- name: Update PR commit statuses
run: |
echo "${{ needs.build-doc.result }}"
echo "${{ env.STATUS }}"
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Custom doc building job" -f "context=custom-doc-build"

View File

@ -1,177 +0,0 @@
name: PR slow CI
on:
pull_request_target:
types: [opened, synchronize, reopened]
jobs:
get-pr-number:
name: Get PR number
uses: ./.github/workflows/get-pr-number.yml
get-pr-info:
name: Get PR commit SHA
needs: get-pr-number
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
uses: ./.github/workflows/get-pr-info.yml
with:
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
get-jobs:
name: Get test files to run
runs-on: ubuntu-22.04
needs: [get-pr-number, get-pr-info]
outputs:
jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
steps:
- name: Get repository content
id: repo_content
uses: actions/github-script@v6
with:
script: |
const { data: tests_dir } = await github.rest.repos.getContent({
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
path: 'tests',
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
});
const { data: tests_models_dir } = await github.rest.repos.getContent({
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
path: 'tests/models',
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
});
const { data: tests_quantization_dir } = await github.rest.repos.getContent({
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
path: 'tests/quantization',
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
});
core.setOutput('tests_dir', tests_dir);
core.setOutput('tests_models_dir', tests_models_dir);
core.setOutput('tests_quantization_dir', tests_quantization_dir);
# This checkout to the main branch
- uses: actions/checkout@v4
with:
fetch-depth: "0"
- name: Write pr_files file
run: |
cat > pr_files.txt << 'EOF'
${{ needs.get-pr-info.outputs.PR_FILES }}
EOF
- name: Write tests_dir file
run: |
cat > tests_dir.txt << 'EOF'
${{ steps.repo_content.outputs.tests_dir }}
EOF
- name: Write tests_models_dir file
run: |
cat > tests_models_dir.txt << 'EOF'
${{ steps.repo_content.outputs.tests_models_dir }}
EOF
- name: Write tests_quantization_dir file
run: |
cat > tests_quantization_dir.txt << 'EOF'
${{ steps.repo_content.outputs.tests_quantization_dir }}
EOF
- name: Run script to get jobs to run
id: get_jobs
run: |
python utils/get_pr_run_slow_jobs.py | tee output.txt
echo "jobs_to_run: $(tail -n 1 output.txt)"
echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT
send_comment:
# Will delete the previous comment and send a new one if:
# - either the content is changed
# - or the previous comment is 30 minutes or more old
name: Send a comment to suggest jobs to run
if: ${{ needs.get-jobs.outputs.jobs != '' }}
needs: [get-pr-number, get-jobs]
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- name: Check and update comment if needed
uses: actions/github-script@v7
env:
BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}"
with:
script: |
const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }};
const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)";
const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000); // 30 minutes ago
const newBody = `${commentPrefix}${process.env.BODY}`;
// Get all comments on the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber
});
// Find existing comments that start with our prefix
const existingComments = comments.filter(comment =>
comment.user.login === 'github-actions[bot]' &&
comment.body.startsWith(commentPrefix)
);
let shouldCreateNewComment = true;
let commentsToDelete = [];
if (existingComments.length > 0) {
// Get the most recent comment
const mostRecentComment = existingComments
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
const commentDate = new Date(mostRecentComment.created_at);
const isOld = commentDate < thirtyMinutesAgo;
const isDifferentContent = mostRecentComment.body !== newBody;
console.log(`Most recent comment created: ${mostRecentComment.created_at}`);
console.log(`Is older than 30 minutes: ${isOld}`);
console.log(`Has different content: ${isDifferentContent}`);
if (isOld || isDifferentContent) {
// Delete all existing comments and create new one
commentsToDelete = existingComments;
console.log(`Will delete ${commentsToDelete.length} existing comment(s) and create new one`);
} else {
// Content is same and comment is recent, skip
shouldCreateNewComment = false;
console.log('Comment is recent and content unchanged, skipping update');
}
} else {
console.log('No existing comments found, will create new one');
}
// Delete old comments if needed
for (const comment of commentsToDelete) {
console.log(`Deleting comment #${comment.id} (created: ${comment.created_at})`);
await github.rest.issues.deleteComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: comment.id
});
}
// Create new comment if needed
if (shouldCreateNewComment) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: newBody
});
console.log('✅ New comment created');
} else {
console.log(' No comment update needed');
}

View File

@ -4,6 +4,18 @@ on:
push:
branches: [ main ]
env:
OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
jobs:
get_modified_models:
name: "Get all modified files"
@ -14,144 +26,118 @@ jobs:
- name: Check out code
uses: actions/checkout@v4
- name: Get changed files using `actions/github-script`
id: get-changed-files
uses: actions/github-script@v7
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
with:
script: |
let files = [];
files: src/transformers/models/**
// Only handle push events
if (context.eventName === 'push') {
const afterSha = context.payload.after;
const branchName = context.payload.ref.replace('refs/heads/', '');
let baseSha;
if (branchName === 'main') {
console.log('Push to main branch, comparing to parent commit');
// Get the parent commit of the pushed commit
const { data: commit } = await github.rest.repos.getCommit({
owner: context.repo.owner,
repo: context.repo.repo,
ref: afterSha
});
baseSha = commit.parents[0]?.sha;
if (!baseSha) {
throw new Error('No parent commit found for the pushed commit');
}
} else {
console.log(`Push to branch ${branchName}, comparing to main`);
baseSha = 'main';
}
const { data: comparison } = await github.rest.repos.compareCommits({
owner: context.repo.owner,
repo: context.repo.repo,
base: baseSha,
head: afterSha
});
// Include added, modified, and renamed files
files = comparison.files
.filter(file => file.status === 'added' || file.status === 'modified' || file.status === 'renamed')
.map(file => file.filename);
}
// Include all files under src/transformers/ (not just models subdirectory)
const filteredFiles = files.filter(file =>
file.startsWith('src/transformers/')
);
core.setOutput('changed_files', filteredFiles.join(' '));
core.setOutput('any_changed', filteredFiles.length > 0 ? 'true' : 'false');
- name: Parse changed files with Python
if: steps.get-changed-files.outputs.any_changed == 'true'
env:
CHANGED_FILES: ${{ steps.get-changed-files.outputs.changed_files }}
- name: Run step if only the files listed above change
if: steps.changed-files.outputs.any_changed == 'true'
id: set-matrix
env:
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
python3 - << 'EOF'
import os
import sys
import json
# Add the utils directory to Python path
sys.path.insert(0, 'utils')
# Import the important models list
from important_files import IMPORTANT_MODELS
print(f"Important models: {IMPORTANT_MODELS}")
# Get the changed files from the previous step
changed_files_str = os.environ.get('CHANGED_FILES', '')
changed_files = changed_files_str.split() if changed_files_str else []
# Filter to only Python files
python_files = [f for f in changed_files if f.endswith('.py')]
print(f"Python files changed: {python_files}")
result_models = set()
# Specific files that trigger all models
transformers_utils_files = [
'modeling_utils.py',
'modeling_rope_utils.py',
'modeling_flash_attention_utils.py',
'modeling_attn_mask_utils.py',
'cache_utils.py',
'masking_utils.py',
'pytorch_utils.py'
]
# Single loop through all Python files
for file in python_files:
# Check for files under src/transformers/models/
if file.startswith('src/transformers/models/'):
remaining_path = file[len('src/transformers/models/'):]
if '/' in remaining_path:
model_dir = remaining_path.split('/')[0]
if model_dir in IMPORTANT_MODELS:
result_models.add(model_dir)
print(f"Added model directory: {model_dir}")
# Check for specific files under src/transformers/ or src/transformers/generation/ files
elif file.startswith('src/transformers/generation/') or \
(file.startswith('src/transformers/') and os.path.basename(file) in transformers_utils_files):
print(f"Found core file: {file} - including all important models")
result_models.update(IMPORTANT_MODELS)
break # No need to continue once we include all models
# Convert to sorted list and create matrix
result_list = sorted(list(result_models))
print(f"Final model list: {result_list}")
if result_list:
matrix_json = json.dumps(result_list)
print(f"matrix={matrix_json}")
# Write to GITHUB_OUTPUT
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write(f"matrix={matrix_json}\n")
else:
print("matrix=[]")
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write("matrix=[]\n")
EOF
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled.yml
model_arrays=()
for file in $ALL_CHANGED_FILES; do
model_path="${file#*models/}"
model_path="models/${model_path%%/*}"
if grep -qFx "$model_path" utils/important_models.txt; then
# Append the file to the matrix string
model_arrays+=("$model_path")
fi
done
matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
test_modified_files:
needs: get_modified_models
if: needs.get_modified_models.outputs.matrix != '' && needs.get_modified_models.outputs.matrix != '[]'
name: Slow & FA2 tests
runs-on:
group: aws-g5-4xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
strategy:
fail-fast: false
matrix:
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Install locally transformers & other libs
run: |
apt install sudo
sudo -H pip install --upgrade pip
sudo -H pip uninstall -y transformers
sudo -H pip install -U -e ".[testing]"
MAX_JOBS=4 pip install flash-attn --no-build-isolation
pip install bitsandbytes
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Show installed libraries and their versions
run: pip freeze
- name: Run FA2 tests
id: run_fa2_tests
run:
pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-push"
docker: huggingface/transformers-all-latest-gpu
ci_event: push
report_repo_id: hf-internal-testing/transformers_ci_push
commit_sha: ${{ github.sha }}
models: ${{ needs.get_modified_models.outputs.matrix }}
name: ${{ matrix.model-name }}_fa2_tests
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
status: ${{ steps.run_fa2_tests.conclusion}}
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- name: Run integration tests
id: run_integration_tests
if: always()
run:
pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: tests_integration_${{ matrix.model-name }}
path: /transformers/reports/tests_integration_${{ matrix.model-name }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
status: ${{ steps.run_integration_tests.conclusion}}
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- name: Tailscale # In order to be able to SSH when a test fails
if: ${{ runner.debug == '1'}}
uses: huggingface/tailscale-action@v1
with:
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true
benchmark:
name: Benchmark workflow
needs: get_modified_models
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
uses: ./.github/workflows/benchmark.yml
secrets: inherit

View File

@ -1,415 +0,0 @@
name: PR comment GitHub CI
on:
issue_comment:
types:
- created
branches-ignore:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
cancel-in-progress: true
permissions: read-all
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
get-pr-number:
runs-on: ubuntu-22.04
name: Get PR number
# For security: only allow team members to run
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps:
- name: Get PR number
shell: bash
run: |
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
else
echo "PR_NUMBER=" >> $GITHUB_ENV
fi
- name: Check PR number
shell: bash
run: |
echo "${{ env.PR_NUMBER }}"
- name: Set PR number
id: set_pr_number
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
get-sha:
runs-on: ubuntu-22.04
needs: get-pr-number
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
outputs:
PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
- name: Get SHA (and verify timestamps against the issue comment date)
id: get_sha
env:
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
COMMENT_DATE: ${{ github.event.comment.created_at }}
run: |
git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
git checkout refs/remotes/pull/$PR_NUMBER/head
echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
git checkout refs/remotes/pull/$PR_NUMBER/merge
echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
echo "COMMENT_DATE: $COMMENT_DATE"
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
exit -1;
fi
# use a python script to handle this complex logic
# case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
# case 2: `run-slow model_1, model_2`
get-tests:
runs-on: ubuntu-22.04
needs: [get-pr-number, get-sha]
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
outputs:
models: ${{ steps.models_to_run.outputs.models }}
quantizations: ${{ steps.models_to_run.outputs.quantizations }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Get models to test
env:
PR_COMMENT: ${{ github.event.comment.body }}
run: |
python -m pip install GitPython
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
- name: Show models to test
id: models_to_run
run: |
echo "${{ env.models }}"
echo "models=${{ env.models }}" >> $GITHUB_ENV
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
echo "${{ env.quantizations }}"
echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
reply_to_comment:
name: Reply to the comment
if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-tests]
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- name: Reply to the comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MODELS: ${{ needs.get-tests.outputs.models }}
BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
create_run:
name: Create run
if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-sha, get-tests, reply_to_comment]
permissions:
statuses: write
runs-on: ubuntu-22.04
steps:
- name: Create Run
id: create_run
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
run_models_gpu:
name: Run all tests for the model
if: ${{ needs.get-tests.outputs.models != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run]
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.get-tests.outputs.models) }}
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ matrix.folders }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout to PR merge commit
working-directory: /transformers
run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
echo $CUDA_VISIBLE_DEVICES
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
run_quantization_torch_gpu:
name: Run all tests for a quantization
if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run]
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout to PR merge commit
working-directory: /transformers
run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
update_run_status:
name: Update Check Run Status
needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
permissions:
statuses: write
if: ${{ always() && needs.create_run.result == 'success' }}
runs-on: ubuntu-22.04
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
steps:
- name: Get `run_models_gpu` job status
run: |
echo "${{ needs.run_models_gpu.result }}"
echo "${{ needs.run_quantization_torch_gpu.result }}"
echo $STATUS_OK
if [ "$STATUS_OK" = "true" ]; then
echo "STATUS=success" >> $GITHUB_ENV
else
echo "STATUS=failure" >> $GITHUB_ENV
fi
- name: Update PR commit statuses
run: |
echo "${{ needs.run_models_gpu.result }}"
echo "${{ env.STATUS }}"
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"

View File

@ -1,56 +1,43 @@
name: Nvidia CI with nightly torch
name: Self-hosted runner (nightly-ci)
on:
repository_dispatch:
# triggered when the daily scheduled Nvidia CI is completed.
# This way, we can compare the results more easily.
workflow_run:
workflows: ["Nvidia CI"]
branches: ["main"]
types: [completed]
schedule:
- cron: "17 2 * * *"
push:
branches:
- run_ci_with_nightly_torch*
# Used for `push` to easily modify the target workflow runs to compare against
env:
prev_workflow_run_id: ""
other_workflow_run_id: ""
- run_nightly_ci*
jobs:
build_nightly_torch_ci_images:
name: Build CI Docker Images with nightly torch
build_nightly_ci_images:
name: Build Nightly CI Docker Images
if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
uses: ./.github/workflows/build-nightly-ci-docker-images.yml
with:
job: latest-with-torch-nightly-docker
secrets: inherit
setup:
name: Setup
runs-on: ubuntu-22.04
steps:
- name: Setup
run: |
mkdir "setup_values"
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: setup_values
path: setup_values
model-ci:
name: Model CI
needs: build_nightly_torch_ci_images
needs: [build_nightly_ci_images]
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-past-future"
runner: ci
docker: huggingface/transformers-all-latest-torch-nightly-gpu
ci_event: Nightly CI
report_repo_id: hf-internal-testing/transformers_daily_ci_with_torch_nightly
commit_sha: ${{ github.event.workflow_run.head_sha || github.sha }}
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
needs: [build_nightly_ci_images]
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-past-future"
runner: ci
# test deepspeed nightly build with the latest release torch
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
ci_event: Nightly CI
working-directory-prefix: /workspace
secrets: inherit

View File

@ -21,6 +21,39 @@ jobs:
echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
run_past_ci_pytorch_1-13:
name: PyTorch 1.13
needs: get_number
if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.13"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_pytorch_1-12:
name: PyTorch 1.12
needs: get_number
if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.12"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_pytorch_1-11:
name: PyTorch 1.11
needs: get_number
if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.11"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_tensorflow_2-11:
name: TensorFlow 2.11
needs: get_number

151
.github/workflows/self-pr-slow-ci.yml vendored Normal file
View File

@ -0,0 +1,151 @@
name: PR slow CI
on:
pull_request:
paths:
- "src/transformers/models/*/modeling_*.py"
- "tests/**/test_*.py"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
find_models_to_run:
runs-on: ubuntu-22.04
name: Find models to run slow tests
# Triggered only if the required label `run-slow` is added
if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
outputs:
models: ${{ steps.models_to_run.outputs.models }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: ${{ github.event.pull_request.head.sha }}
- name: Get commit message
run: |
echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
- name: Get models to run slow tests
run: |
echo "${{ env.commit_message }}"
python -m pip install GitPython
python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
- name: Models to run slow tests
id: models_to_run
run: |
echo "${{ env.models }}"
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
run_models_gpu:
name: Run all tests for the model
# Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
# (either a new model PR or via a commit message)
if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
needs: find_models_to_run
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ matrix.folders }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
echo $CUDA_VISIBLE_DEVICES
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

View File

@ -1,10 +1,10 @@
name: Self-hosted runner (AMD mi210 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*

View File

@ -1,10 +1,10 @@
name: Self-hosted runner (AMD mi250 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*

View File

@ -0,0 +1,25 @@
name: Self-hosted runner (AMD mi300 CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi300
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi300
secrets: inherit

View File

@ -14,6 +14,7 @@ env:
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
jobs:

View File

@ -25,7 +25,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
uses: tj-actions/changed-files@v41
- name: Was setup changed
id: was_changed

View File

@ -24,6 +24,7 @@ env:
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
@ -31,12 +32,12 @@ jobs:
name: Setup
strategy:
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
test_map: ${{ steps.set-matrix.outputs.test_map }}
@ -131,12 +132,12 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
machine_type: [aws-g5-4xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
@ -169,9 +170,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -244,7 +245,7 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
machine_type: [aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
@ -282,9 +283,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -357,12 +358,12 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
@ -395,9 +396,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -467,7 +468,7 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
@ -505,9 +506,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}

View File

@ -0,0 +1,55 @@
name: Self-hosted runner (AMD mi210 scheduled CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit

View File

@ -12,48 +12,44 @@ on:
jobs:
model-ci:
name: Model CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
report_repo_id: optimum-amd/transformers_daily_ci
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
report_repo_id: optimum-amd/transformers_daily_ci
secrets: inherit
example-ci:
name: Example CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
report_repo_id: optimum-amd/transformers_daily_ci
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
report_repo_id: optimum-amd/transformers_daily_ci
secrets: inherit

View File

@ -1,67 +0,0 @@
name: Self-hosted runner scale set (AMD mi325 scheduled CI caller)
# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
# For example, 1gpu scale set: amd-mi325-ci-1gpu
# 2gpu scale set: amd-mi325-ci-2gpu
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_models_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: amd-mi325
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi325
report_repo_id: optimum-amd/transformers_daily_ci
env_file: /etc/podinfo/gha-gpu-isolation-settings
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: amd-mi325
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi325
report_repo_id: optimum-amd/transformers_daily_ci
env_file: /etc/podinfo/gha-gpu-isolation-settings
secrets: inherit
example-ci:
name: Example CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_examples_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: amd-mi325
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi325
report_repo_id: optimum-amd/transformers_daily_ci
env_file: /etc/podinfo/gha-gpu-isolation-settings
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: amd-mi325
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi325
report_repo_id: optimum-amd/transformers_daily_ci
env_file: /etc/podinfo/gha-gpu-isolation-settings
secrets: inherit

View File

@ -1,63 +0,0 @@
name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)
# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
# For example, 1gpu : amd-mi355-ci-1gpu
# 2gpu : amd-mi355-ci-2gpu
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_models_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: hfc-amd-mi355
docker: huggingface/testing-rocm7.0-preview
ci_event: Scheduled CI (AMD) - mi355
report_repo_id: hf-transformers-bot/transformers-ci-dummy
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: hfc-amd-mi355
docker: huggingface/testing-rocm7.0-preview
ci_event: Scheduled CI (AMD) - mi355
report_repo_id: hf-transformers-bot/transformers-ci-dummy
secrets: inherit
example-ci:
name: Example CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_examples_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: hfc-amd-mi355
docker: huggingface/testing-rocm7.0-preview
ci_event: Scheduled CI (AMD) - mi355
report_repo_id: hf-transformers-bot/transformers-ci-dummy
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#amd-hf-ci"
runner_group: hfc-amd-mi355
docker: huggingface/testing-rocm7.0-preview
ci_event: Scheduled CI (AMD) - mi355
report_repo_id: hf-transformers-bot/transformers-ci-dummy
secrets: inherit

349
.github/workflows/self-scheduled-amd.yml vendored Normal file
View File

@ -0,0 +1,349 @@
name: Self-hosted runner (scheduled-amd)
# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
# us towards the limit of allowed jobs on GitHub Actions.
on:
workflow_call:
inputs:
job:
required: true
type: string
slack_report_channel:
required: true
type: string
runner:
required: true
type: string
docker:
required: true
type: string
ci_event:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
NUM_SLICES: 2
# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
# This is done so that we avoid parallelizing the scheduled tests, to leave available
# runners for the push CI that is running on the same machine.
jobs:
check_runner_status:
name: Check Runner Status
runs-on: ubuntu-22.04
steps:
- name: Checkout transformers
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
check_runners:
name: Check Runners
needs: check_runner_status
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
setup:
if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
name: Setup
needs: check_runners
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Cleanup
working-directory: /transformers
run: |
rm -rf tests/__pycache__
rm -rf tests/models/__pycache__
rm -rf reports
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- id: set-matrix
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
run_models_gpu:
if: ${{ inputs.job == 'run_models_gpu' }}
name: Single GPU tests
needs: setup
strategy:
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs_amd.yml
with:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
runner: ${{ inputs.runner }}
docker: ${{ inputs.docker }}
secrets: inherit
run_pipelines_torch_gpu:
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: PyTorch pipelines
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
run_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
name: Torch ROCm deepspeed tests
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Slack Report
needs: [
check_runner_status,
check_runners,
setup,
run_models_gpu,
run_pipelines_torch_gpu,
run_examples_gpu,
run_torch_cuda_extensions_gpu
]
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
with:
job: ${{ inputs.job }}
# This would be `skipped` if `setup` is skipped.
setup_status: ${{ needs.setup.result }}
slack_report_channel: ${{ inputs.slack_report_channel }}
# This would be an empty string if `setup` is skipped.
folder_slices: ${{ needs.setup.outputs.folder_slices }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
ci_event: ${{ inputs.ci_event }}
secrets: inherit

View File

@ -1,4 +1,5 @@
name: Nvidia CI
name: Self-hosted runner (scheduled)
on:
repository_dispatch:
@ -6,55 +7,18 @@ on:
- cron: "17 2 * * *"
push:
branches:
- run_nvidia_ci*
workflow_dispatch:
inputs:
prev_workflow_run_id:
description: 'previous workflow run id to compare'
type: string
required: false
default: ""
other_workflow_run_id:
description: 'other workflow run id to compare'
type: string
required: false
default: ""
# Used for `push` to easily modify the target workflow runs to compare against
env:
prev_workflow_run_id: ""
other_workflow_run_id: ""
- run_scheduled_ci*
jobs:
setup:
name: Setup
runs-on: ubuntu-22.04
steps:
- name: Setup
run: |
mkdir "setup_values"
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: setup_values
path: setup_values
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-models"
runner: daily-ci
docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI
runner_type: "a10"
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit
torch-pipeline:
@ -63,10 +27,20 @@ jobs:
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
runner: daily-ci
docker: huggingface/transformers-pytorch-gpu
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit
tf-pipeline:
name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_tf_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
runner: daily-ci
docker: huggingface/transformers-tensorflow-gpu
ci_event: Daily CI
secrets: inherit
example-ci:
@ -75,23 +49,9 @@ jobs:
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-examples"
runner: daily-ci
docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit
trainer-fsdp-ci:
name: Trainer/FSDP CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_trainer_and_fsdp_gpu
slack_report_channel: "#transformers-ci-daily-training"
docker: huggingface/transformers-all-latest-gpu
runner_type: "a10"
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit
deepspeed-ci:
@ -99,12 +59,11 @@ jobs:
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-training"
slack_report_channel: "#transformers-ci-daily-deepspeed"
runner: daily-ci
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
ci_event: Daily CI
working-directory-prefix: /workspace
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit
quantization-ci:
@ -113,8 +72,7 @@ jobs:
with:
job: run_quantization_torch_gpu
slack_report_channel: "#transformers-ci-daily-quantization"
runner: daily-ci
docker: huggingface/transformers-quantization-latest-gpu
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}
secrets: inherit

View File

@ -1,341 +0,0 @@
name: Self-hosted runner (scheduled-intel-gaudi)
on:
workflow_call:
inputs:
job:
required: true
type: string
slack_report_channel:
required: true
type: string
runner_scale_set:
required: true
type: string
ci_event:
required: true
type: string
report_repo_id:
required: true
type: string
env:
NUM_SLICES: 2
RUN_SLOW: yes
PT_HPU_LAZY_MODE: 0
TRANSFORMERS_IS_CI: yes
PT_ENABLE_INT64_SUPPORT: 1
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache/.cache/huggingface
jobs:
setup:
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
name: Setup
runs-on: ubuntu-latest
outputs:
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
quantization_matrix: ${{ steps.set-matrix.outputs.quantization_matrix }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- id: set-matrix
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
name: Identify models to test
working-directory: tests
run: |
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
fi
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
name: Identify quantization method to test
working-directory: tests
run: |
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
run_models_gpu:
if: ${{ inputs.job == 'run_models_gpu' }}
name: " "
needs: setup
strategy:
fail-fast: false
matrix:
machine_type: [1gaudi, 2gaudi]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs_intel_gaudi.yml
with:
slice_id: ${{ matrix.slice_id }}
machine_type: ${{ matrix.machine_type }}
folder_slices: ${{ needs.setup.outputs.folder_slices }}
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
secrets: inherit
run_trainer_and_fsdp_gpu:
if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
name: " "
needs: setup
strategy:
fail-fast: false
matrix:
machine_type: [1gaudi, 2gaudi]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs_intel_gaudi.yml
with:
slice_id: ${{ matrix.slice_id }}
machine_type: ${{ matrix.machine_type }}
folder_slices: ${{ needs.setup.outputs.folder_slices }}
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
report_name_prefix: run_trainer_and_fsdp_gpu
secrets: inherit
run_pipelines_torch_gpu:
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: Pipelines
strategy:
fail-fast: false
matrix:
machine_type: [1gaudi, 2gaudi]
runs-on:
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
--env HABANA_VISIBLE_DEVICES
--env HABANA_VISIBLE_MODULES
--cap-add=sys_nice
--shm-size=64G
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install dependencies
run: |
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Environment
run: python3 utils/print_env.py
- name: Show installed libraries and their versions
run: pip freeze
- name: Set `machine_type` for report and artifact names
shell: bash
run: |
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run all pipeline tests on Intel Gaudi
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
path: reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
strategy:
fail-fast: false
matrix:
machine_type: [1gaudi]
runs-on:
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
--env HABANA_VISIBLE_DEVICES
--env HABANA_VISIBLE_MODULES
--cap-add=sys_nice
--shm-size=64G
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install dependencies
run: |
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Environment
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
run: |
pip freeze
- name: Set `machine_type` for report and artifact names
shell: bash
run: |
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run examples tests on Intel Gaudi
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_examples_gpu_test_reports
path: reports/${{ env.machine_type }}_run_examples_gpu_test_reports
run_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
name: Intel Gaudi deepspeed tests
strategy:
fail-fast: false
matrix:
machine_type: [1gaudi, 2gaudi]
runs-on:
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
--env HABANA_VISIBLE_DEVICES
--env HABANA_VISIBLE_MODULES
--cap-add=sys_nice
--shm-size=64G
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install dependencies
run: |
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Environment
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
run: |
pip freeze
- name: Set `machine_type` for report and artifact names
shell: bash
run: |
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run all deepspeed tests on intel Gaudi
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Slack Report
needs:
[
setup,
run_models_gpu,
run_examples_gpu,
run_torch_cuda_extensions_gpu,
run_pipelines_torch_gpu,
run_trainer_and_fsdp_gpu,
]
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
with:
job: ${{ inputs.job }}
setup_status: ${{ needs.setup.result }}
slack_report_channel: ${{ inputs.slack_report_channel }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
folder_slices: ${{ needs.setup.outputs.folder_slices }}
report_repo_id: ${{ inputs.report_repo_id }}
ci_event: ${{ inputs.ci_event }}
secrets: inherit

View File

@ -1,67 +0,0 @@
name: Self-hosted runner (Intel Gaudi3 scheduled CI caller)
on:
repository_dispatch:
workflow_dispatch:
schedule:
- cron: "17 2 * * *"
jobs:
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_models_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
secrets: inherit
pipeline-ci:
name: Pipeline CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_pipelines_torch_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
secrets: inherit
example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_examples_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_torch_cuda_extensions_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
secrets: inherit
trainer-fsdp-ci:
name: Trainer/FSDP CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_trainer_and_fsdp_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
secrets: inherit

View File

@ -1,4 +1,4 @@
name: Nvidia CI (job definitions)
name: Self-hosted runner (scheduled)
# Note that each job's dependencies go into a corresponding docker file.
#
@ -15,6 +15,9 @@ on:
slack_report_channel:
required: true
type: string
runner:
required: true
type: string
docker:
required: true
type: string
@ -25,19 +28,6 @@ on:
default: ''
required: false
type: string
report_repo_id:
required: true
type: string
commit_sha:
required: false
type: string
runner_type:
required: false
type: string
models:
default: ""
required: false
type: string
env:
HF_HOME: /mnt/cache
@ -48,22 +38,24 @@ env:
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
jobs:
setup:
if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job)
name: Setup
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
strategy:
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
@ -72,7 +64,7 @@ jobs:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
git fetch && git checkout ${{ github.sha }}
- name: Cleanup
working-directory: /transformers
@ -86,17 +78,12 @@ jobs:
run: pip freeze
- id: set-matrix
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
if: ${{ inputs.job == 'run_models_gpu' }}
name: Identify models to test
working-directory: /transformers/tests
run: |
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
fi
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
@ -116,38 +103,15 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs.yml
with:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
runner: ${{ inputs.runner }}
docker: ${{ inputs.docker }}
commit_sha: ${{ inputs.commit_sha || github.sha }}
runner_type: ${{ inputs.runner_type }}
report_repo_id: ${{ inputs.report_repo_id }}
secrets: inherit
run_trainer_and_fsdp_gpu:
if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
name: " "
needs: setup
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
slice_id: [0, 1]
uses: ./.github/workflows/model_jobs.yml
with:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
docker: ${{ inputs.docker }}
commit_sha: ${{ inputs.commit_sha || github.sha }}
runner_type: ${{ inputs.runner_type }}
report_repo_id: ${{ inputs.report_repo_id }}
report_name_prefix: run_trainer_and_fsdp_gpu
secrets: inherit
run_pipelines_torch_gpu:
@ -156,7 +120,7 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
@ -165,7 +129,7 @@ jobs:
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
@ -190,9 +154,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -218,22 +182,23 @@ jobs:
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
run_pipelines_tf_gpu:
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
name: TensorFlow pipelines
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
image: huggingface/transformers-tensorflow-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
run: |
git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
@ -258,9 +223,77 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
- name: Failure short reports
if: ${{ always() }}
run: |
cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
strategy:
fail-fast: false
matrix:
machine_type: [aws-g4dn-2xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -293,7 +326,7 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
@ -302,7 +335,7 @@ jobs:
steps:
- name: Update clone
working-directory: ${{ inputs.working-directory-prefix }}/transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: ${{ inputs.working-directory-prefix }}/transformers
@ -333,7 +366,7 @@ jobs:
run: |
python3 -m pip uninstall -y deepspeed
rm -rf DeepSpeed
git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
@ -350,14 +383,14 @@ jobs:
run: pip freeze
- name: Set `machine_type` for report and artifact names
working-directory: ${{ inputs.working-directory-prefix }}/transformers
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -392,7 +425,7 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
@ -410,7 +443,7 @@ jobs:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
@ -435,9 +468,9 @@ jobs:
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
@ -474,7 +507,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 2
ref: ${{ inputs.commit_sha || github.sha }}
- name: Install transformers
run: pip install transformers
@ -510,14 +542,14 @@ jobs:
needs: [
setup,
run_models_gpu,
run_trainer_and_fsdp_gpu,
run_pipelines_torch_gpu,
run_pipelines_tf_gpu,
run_examples_gpu,
run_torch_cuda_extensions_gpu,
run_quantization_torch_gpu,
run_extract_warnings
]
if: always() && !cancelled()
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
with:
job: ${{ inputs.job }}
@ -528,22 +560,15 @@ jobs:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
ci_event: ${{ inputs.ci_event }}
report_repo_id: ${{ inputs.report_repo_id }}
commit_sha: ${{ inputs.commit_sha || github.sha }}
secrets: inherit
check_new_failures:
if: ${{ always() && inputs.ci_event == 'Daily CI' && needs.send_results.result == 'success' }}
name: Check new failures
check_new_model_failures:
if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
name: Check new model failures
needs: send_results
uses: ./.github/workflows/check_failed_tests.yml
uses: ./.github/workflows/check_failed_model_tests.yml
with:
docker: ${{ inputs.docker }}
start_sha: ${{ inputs.commit_sha || github.sha }}
job: ${{ inputs.job }}
slack_report_channel: ${{ inputs.slack_report_channel }}
ci_event: ${{ inputs.ci_event }}
report_repo_id: ${{ inputs.report_repo_id }}
start_sha: ${{ github.sha }}
secrets: inherit

View File

@ -21,13 +21,6 @@ on:
ci_event:
required: true
type: string
report_repo_id:
required: true
type: string
commit_sha:
required: false
type: string
env:
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -36,7 +29,7 @@ jobs:
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always() && !cancelled()
if: always()
steps:
- name: Preliminary job status
shell: bash
@ -45,28 +38,9 @@ jobs:
echo "Setup status: ${{ inputs.setup_status }}"
- uses: actions/checkout@v4
with:
fetch-depth: 2
ref: ${{ inputs.commit_sha || github.sha }}
- uses: actions/download-artifact@v4
- name: Prepare some setup values
run: |
if [ -f setup_values/prev_workflow_run_id.txt ]; then
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
if [ -f setup_values/other_workflow_run_id.txt ]; then
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
- name: Send message to Slack
shell: bash
if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
@ -75,25 +49,20 @@ jobs:
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: ${{ inputs.ci_event }}
# This `CI_TITLE` would be empty for `schedule` or `workflow_run` events.
CI_TITLE: ${{ github.event.head_commit.message }}
CI_SHA: ${{ inputs.commit_sha || github.sha }}
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
CI_TEST_JOB: ${{ inputs.job }}
SETUP_STATUS: ${{ inputs.setup_status }}
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
# empty string, and the called script still get one argument (which is the emtpy string).
run: |
sudo apt-get install -y curl
pip install huggingface_hub
pip install slack_sdk
pip show slack_sdk
if [ "${{ inputs.quantization_matrix }}" != "" ]; then
python utils/notification_service.py "${{ inputs.quantization_matrix }}"
else
python utils/notification_service.py "${{ inputs.folder_slices }}"
fi
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
@ -101,3 +70,32 @@ jobs:
with:
name: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack for quantization workflow
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
CI_EVENT: ${{ inputs.ci_event }}
CI_SHA: ${{ github.sha }}
CI_TEST_JOB: ${{ inputs.job }}
SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install huggingface_hub
pip install slack_sdk
pip show slack_sdk
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
uses: actions/upload-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}

View File

@ -20,8 +20,10 @@ env:
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
RUN_PT_TF_CROSS_TESTS: 1
jobs:
get_runner:
@ -32,17 +34,14 @@ jobs:
steps:
- name: Get runner to use
shell: bash
env:
NUM_GPUS: ${{ github.event.inputs.num_gpus }}
RUNNER_TYPE: ${{ github.event.inputs.runner_type }}
run: |
if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then
echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then
if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV
elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then
elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
else
echo "RUNNER=" >> $GITHUB_ENV
@ -87,11 +86,9 @@ jobs:
- name: Store Slack infos
#because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
shell: bash
env:
GITHUB_ACTOR: ${{ github.actor }}
run: |
echo "$GITHUB_ACTOR"
github_actor=$GITHUB_ACTOR
echo "${{ github.actor }}"
github_actor=${{ github.actor }}
github_actor=${github_actor/'-'/'_'}
echo "$github_actor"
echo "github_actor=$github_actor" >> $GITHUB_ENV

View File

@ -16,5 +16,3 @@ jobs:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main
with:
extra_args: --results=verified,unknown

View File

@ -19,7 +19,7 @@ jobs:
- name: Setup environment
run: |
pip install --upgrade pip
pip install datasets pandas
pip install datasets pandas==2.0.3
pip install .[torch,tf,flax]
- name: Update metadata

8
.gitignore vendored
View File

@ -13,7 +13,6 @@ tests/fixtures/cached_*_text.txt
logs/
lightning_logs/
lang_code_data/
reports/
# Distribution / packaging
.Python
@ -98,7 +97,6 @@ celerybeat-schedule
# Environments
.env
.venv
.venv*
env/
venv/
ENV/
@ -169,9 +167,3 @@ tags
# ruff
.ruff_cache
# modular conversion
*.modular_backup
# Cursor IDE files
.cursor/

View File

@ -1,39 +0,0 @@
# AGENTS.md Guide for Hugging Face Transformers
This AGENTS.md file provides guidance for code agents working with this codebase.
## Core Project Structure
- `/src/transformers`: This contains the core source code for the library
- `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
- `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
## Coding Conventions for Hugging Face Transformers
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
## Copying and inheritance
Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
We use two mechanisms to keep this code in sync:
- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
When adding new models, you should prefer `modular` style.
## Testing
After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.

View File

@ -68,7 +68,8 @@ already reported** (use the search bar on GitHub under Issues). Your issue shoul
Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
* Your **OS type and version** and **Python**, and **PyTorch** versions when applicable.
* Your **OS type and version** and **Python**, **PyTorch** and
**TensorFlow** versions when applicable.
* A short, self-contained, code snippet that allows us to reproduce the bug in
less than 30s.
* The *full* traceback if an exception is raised.
@ -77,7 +78,7 @@ Once you've confirmed the bug hasn't already been reported, please include the f
To get the OS and software versions automatically, run the following command:
```bash
transformers env
transformers-cli env
```
You can also run the same command from the root of the repository:
@ -112,125 +113,7 @@ New models are constantly released and if you want to implement a new model, ple
If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/modular_transformers).
### Vision-Language Model Contribution Checklist
If you're contributing a **vision-language model** (or any multimodal model that processes images/videos), please follow this checklist. Maintainers will use this to review your PR, and completing these steps will significantly increase the likelihood of your PR being merged quickly.
**Required checklist for all vision-language model contributions:**
**1. Implement a modular file**
All new models should use the modular architecture pattern. Create a `modular_<model_name>.py` file using the modular model converter:
- Use the CLI, [`transformers add-new-model-like`](https://github.com/huggingface/transformers/blob/main/src/transformers/cli/add_new_model_like.py) to generate a modular skeleton and get started
- All code should be in the modular file if possible. Modeling must be in it, it's better if configuration is in it as well.
- Reuse existing patterns from similar models as much as possible
To verify your modular file is correct, run:
```bash
python utils/modular_model_converter.py <model_name>
```
This will generate the separate files (`modeling_*.py`, `configuration_*.py`, etc.) from your modular file. The CI will enforce that these generated files match your modular file.
**2. Add a fast image processor (for image models)**
If your model processes images, implement a fast image processor that uses `torch` and `torchvision` instead of PIL/numpy for better inference performance:
- See the detailed guide in [#36978](https://github.com/huggingface/transformers/issues/36978)
- Fast processors inherit from `BaseImageProcessorFast`
- Examples: `LlavaOnevisionImageProcessorFast`, `Idefics2ImageProcessorFast`
**3. Create a weight conversion script**
Add a `convert_<model_name>_to_hf.py` script that converts the original model weights to the HuggingFace format:
- Script should handle checkpoint loading, key mapping, and saving in HF format
- Include usage examples and documentation in the script
- Examples: [`convert_llava_onevision_weights_to_hf.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py), [`convert_idefics2_weights_to_hf.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/idefics2/convert_idefics2_weights_to_hf.py)
**4. Add integration tests with exact output matching**
At minimum, add an `IntegrationTest` class that tests end-to-end generation (processing and modelling) with **exact** output matching:
- For generative models: test that generated text matches expected output exactly
- For non-generative models: test that output logits match expected values
- Tests should use real checkpoints (load in 4-bit or half precision if the checkpoint is too big to fit in our CI runners) and real inputs
- Example pattern:
```python
class MyModelIntegrationTest(unittest.TestCase):
@slow
def test_model_integration(self):
model = MyModelForConditionalGeneration.from_pretrained("org/model-name")
processor = AutoProcessor.from_pretrained("org/model-name")
inputs = processor(images=image, text=prompt, return_tensors="pt")
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_TEXT = "exact expected output"
self.assertEqual(processor.decode(output[0]), EXPECTED_TEXT)
```
See `tests/models/llava_onevision/test_modeling_llava_onevision.py` for complete examples.
**5. Update documentation**
Add or update model documentation:
- Create if the cli hasn't `docs/source/en/model_doc/<model_name>.md` with usage examples
- Include model description, paper link, and basic usage with `Pipeline` and `AutoModel`
- Add the model to the appropriate TOC files
**6. Look for reusable patterns**
The library has 400+ models with many established patterns:
- Search for similar models (e.g., other vision-language models)
- Reuse attention mechanisms, layer implementations, and processing patterns
- Check models like LLaVA, Idefics2, Fuyu for vision-language patterns
- Use provided decorators like (`auto_docstring`, `can_return_tuple`, `check_model_inputs` and `_can_record_outputs`) where relevant.
- Don't reinvent the wheel
**7. Run quality checks and read the output**
Before submitting your PR, install quality dependencies and run the full check suite:
```bash
pip install -e ".[quality]"
make fixup
```
**Important**: Take time to read the output of `make fixup`. It will:
- Lint and format your code automatically
- Run consistency checks (imports, docstrings, etc.)
- Show any remaining issues that need manual fixes
All checks must pass before your PR can be merged.
**If this checklist is complete, your PR has a very high likelihood of being merged!** Following these steps makes the maintainers' work much easier and will reduce the number of review iterations, getting your important work out there faster.
#### Copy-pastable checklist for maintainers
Here's a condensed version maintainers can copy into PRs:
```markdown
## Multimodal Model Addition Checklist
Please ensure your PR completes all following items. See the [full checklist](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#vision-language-model-contribution-checklist) for details.
- [ ] **Modular file**: `modular_<model_name>.py` implemented and verified with `python utils/modular_model_converter.py <model_name>`
- [ ] **Fast image processor**: Implemented using `BaseImageProcessorFast` (see [#36978](https://github.com/huggingface/transformers/issues/36978))
- [ ] **Conversion script**: `convert_<model_name>_to_hf.py` added with usage examples
- [ ] **Integration tests**: End-to-end tests with exact output matching (text or logits)
- [ ] **Documentation**: Model docs added/updated in `docs/source/en/model_doc/`
- [ ] **Pattern reuse**: Verified against similar models (LLaVA, Idefics2, etc.)
- [ ] **Quality checks**: `make fixup` passes with no errors
```
We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
## Do you want to add documentation?
@ -282,7 +165,8 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
mode with the `-e` flag.
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
failure with this command. If that's the case make sure to install Pytorch then do:
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
(PyTorch, TensorFlow and/or Flax) then do:
```bash
pip install -e ".[quality]"
@ -337,10 +221,10 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
[Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.
If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
make sure you install the [documentation builder](https://github.com/huggingface/doc-builder).
make sure you install the documentation builder:
```bash
pip install hf-doc-builder
pip install ".[docs]"
```
Run the following command from the root of the repository:
@ -396,14 +280,13 @@ are working on it).<br>
useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.<br>
☐ Make sure existing tests pass.<br>
☐ If adding a new feature, also add tests for it.<br>
- If you are adding a new model, make sure you use
- If you are adding a new model, make sure you use
`ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)` to trigger the common tests.
- If you are adding new `@slow` tests, make sure they pass using
- If you are adding new `@slow` tests, make sure they pass using
`RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
- If you are adding a new tokenizer, write tests and make sure
- If you are adding a new tokenizer, write tests and make sure
`RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py` passes.
- CircleCI does not run the slow tests, but GitHub Actions does every night!<br>
- CircleCI does not run the slow tests, but GitHub Actions does every night!<br>
☐ All public methods must have informative docstrings (see
[`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py)
@ -459,8 +342,9 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
```
Like the slow tests, there are other environment variables available which are not enabled by default during testing:
- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).

View File

@ -26,7 +26,7 @@ There are two main venues to receive support: [the forums](https://discuss.huggi
[The user forums](https://discuss.huggingface.co/) are supported by the wide community of the library users and backed up by developers when needed.
If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystallized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystalized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
In particular all "Please explain" questions or objectively very user-specific feature requests belong to the forums. Here are some example of such questions:
@ -38,6 +38,7 @@ In particular all "Please explain" questions or objectively very user-specific f
* "How to train T5 on De->En translation?"
## The GitHub Issues
Everything which hints at a bug should be opened as an [issue](https://github.com/huggingface/transformers/issues).
@ -153,7 +154,7 @@ You are not required to read the following guidelines before opening an issue. H
cd examples/seq2seq
torchrun --nproc_per_node=2 ./finetune_trainer.py \
--model_name_or_path sshleifer/distill-mbart-en-ro-12-4 --data_dir wmt_en_ro \
--output_dir output_dir \
--output_dir output_dir --overwrite_output_dir \
--do_train --n_train 500 --num_train_epochs 1 \
--per_device_train_batch_size 1 --freeze_embeds \
--src_lang en_XX --tgt_lang ro_RO --task translation \
@ -246,6 +247,7 @@ You are not required to read the following guidelines before opening an issue. H
Try not use italics and bold text too much as these often make the text more difficult to read.
12. If you are cross-referencing a specific comment in a given thread or another issue, always link to that specific comment, rather than using the issue link. If you do the latter it could be quite impossible to find which specific comment you're referring to.
To get the link to the specific comment do not copy the url from the location bar of your browser, but instead, click the `...` icon in the upper right corner of the comment and then select "Copy Link".
@ -255,14 +257,15 @@ You are not required to read the following guidelines before opening an issue. H
1. https://github.com/huggingface/transformers/issues/9257
2. https://github.com/huggingface/transformers/issues/9257#issuecomment-749945162
13. If you are replying to a last comment, it's totally fine to make your reply with just your comment in it. The readers can follow the information flow here.
But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
```
> How big is your GPU cluster?
> How big is your gpu cluster?
Our cluster is made of 256 GPUs.
Our cluster is made of 256 gpus.
```
If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.

View File

@ -3,24 +3,18 @@
# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
export PYTHONPATH = src
check_dirs := examples tests src utils scripts benchmark benchmark_v2
check_dirs := examples tests src utils
exclude_folders := ""
modified_only_fixup:
@current_branch=$$(git branch --show-current); \
if [ "$$current_branch" = "main" ]; then \
echo "On main branch, running 'style' target instead..."; \
$(MAKE) style; \
else \
modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
if [ -n "$$modified_py_files" ]; then \
echo "Checking/fixing files: $${modified_py_files}"; \
ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
ruff format $${modified_py_files} --exclude $(exclude_folders); \
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo "Checking/fixing $(modified_py_files)"; \
ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
ruff format $(modified_py_files) --exclude $(exclude_folders);\
else \
echo "No library .py files were modified"; \
fi; \
fi
# Update src/transformers/dependency_versions_table.py
@ -43,16 +37,16 @@ autogenerate_code: deps_table_update
repo-consistency:
python utils/check_copies.py
python utils/check_modular_conversion.py
python utils/check_table.py
python utils/check_dummies.py
python utils/check_repo.py
python utils/check_inits.py
python utils/check_pipeline_typing.py
python utils/check_config_docstrings.py
python utils/check_config_attributes.py
python utils/check_doctest_list.py
python utils/update_metadata.py --check-only
python utils/check_docstrings.py
python utils/add_dates.py
python utils/check_support_list.py
# this target runs checks on all files
@ -88,8 +82,8 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
fix-copies:
python utils/check_copies.py --fix_and_overwrite
python utils/check_modular_conversion.py --fix_and_overwrite
python utils/check_table.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite
python utils/check_pipeline_typing.py --fix_and_overwrite
python utils/check_doctest_list.py --fix_and_overwrite
python utils/check_docstrings.py --fix_and_overwrite

382
README.md
View File

@ -25,7 +25,6 @@ limitations under the License.
</p>
<p align="center">
<a href="https://huggingface.com/models"><img alt="Checkpoints on Hub" src="https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen"></a>
<a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
<a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
<a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
@ -44,279 +43,266 @@ limitations under the License.
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Português</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_it.md">Italiano</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_bn.md">বাংলা</a> |
</p>
</h4>
<h3 align="center">
<p>State-of-the-art pretrained models for inference and training</p>
<p>State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow</p>
</h3>
<h3 align="center">
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/>
<a href="https://hf.co/course"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/course_banner.png"></a>
</h3>
Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer
vision, audio, video, and multimodal model, for both inference and training.
🤗 Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio.
It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the
pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training
frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...),
and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from `transformers`.
These models can be applied on:
We pledge to help support new state-of-the-art models and democratize their usage by having their model definition be
simple, customizable, and efficient.
* 📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, and text generation, in over 100 languages.
* 🖼️ Images, for tasks like image classification, object detection, and segmentation.
* 🗣️ Audio, for tasks like speech recognition and audio classification.
There are over 1M+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use.
Transformer models can also perform tasks on **several modalities combined**, such as table question answering, optical character recognition, information extraction from scanned documents, video classification, and visual question answering.
Explore the [Hub](https://huggingface.com/) today to find a model and use Transformers to help you get started right away.
🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and then share them with the community on our [model hub](https://huggingface.co/models). At the same time, each python module defining an architecture is fully standalone and can be modified to enable quick research experiments.
## Installation
🤗 Transformers is backed by the three most popular deep learning libraries — [Jax](https://jax.readthedocs.io/en/latest/), [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/) — with a seamless integration between them. It's straightforward to train your models with one before loading them for inference with the other.
Transformers works with Python 3.9+, and [PyTorch](https://pytorch.org/get-started/locally/) 2.1+.
## Online demos
Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager.
You can test most of our models directly on their pages from the [model hub](https://huggingface.co/models). We also offer [private model hosting, versioning, & an inference API](https://huggingface.co/pricing) for public and private models.
```py
# venv
python -m venv .my-env
source .my-env/bin/activate
# uv
uv venv .my-env
source .my-env/bin/activate
Here are a few examples:
In Natural Language Processing:
- [Masked word completion with BERT](https://huggingface.co/google-bert/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France)
- [Named Entity Recognition with Electra](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city)
- [Text generation with Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
- [Natural Language Inference with RoBERTa](https://huggingface.co/FacebookAI/roberta-large-mnli?text=The+dog+was+lost.+Nobody+lost+any+animal)
- [Summarization with BART](https://huggingface.co/facebook/bart-large-cnn?text=The+tower+is+324+metres+%281%2C063+ft%29+tall%2C+about+the+same+height+as+an+81-storey+building%2C+and+the+tallest+structure+in+Paris.+Its+base+is+square%2C+measuring+125+metres+%28410+ft%29+on+each+side.+During+its+construction%2C+the+Eiffel+Tower+surpassed+the+Washington+Monument+to+become+the+tallest+man-made+structure+in+the+world%2C+a+title+it+held+for+41+years+until+the+Chrysler+Building+in+New+York+City+was+finished+in+1930.+It+was+the+first+structure+to+reach+a+height+of+300+metres.+Due+to+the+addition+of+a+broadcasting+aerial+at+the+top+of+the+tower+in+1957%2C+it+is+now+taller+than+the+Chrysler+Building+by+5.2+metres+%2817+ft%29.+Excluding+transmitters%2C+the+Eiffel+Tower+is+the+second+tallest+free-standing+structure+in+France+after+the+Millau+Viaduct)
- [Question answering with DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species)
- [Translation with T5](https://huggingface.co/google-t5/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin)
In Computer Vision:
- [Image classification with ViT](https://huggingface.co/google/vit-base-patch16-224)
- [Object Detection with DETR](https://huggingface.co/facebook/detr-resnet-50)
- [Semantic Segmentation with SegFormer](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512)
- [Panoptic Segmentation with Mask2Former](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic)
- [Depth Estimation with Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything)
- [Video Classification with VideoMAE](https://huggingface.co/docs/transformers/model_doc/videomae)
- [Universal Segmentation with OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_dinat_large)
In Audio:
- [Automatic Speech Recognition with Whisper](https://huggingface.co/openai/whisper-large-v3)
- [Keyword Spotting with Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks)
- [Audio Classification with Audio Spectrogram Transformer](https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593)
In Multimodal tasks:
- [Table Question Answering with TAPAS](https://huggingface.co/google/tapas-base-finetuned-wtq)
- [Visual Question Answering with ViLT](https://huggingface.co/dandelin/vilt-b32-finetuned-vqa)
- [Image captioning with LLaVa](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
- [Zero-shot Image Classification with SigLIP](https://huggingface.co/google/siglip-so400m-patch14-384)
- [Document Question Answering with LayoutLM](https://huggingface.co/impira/layoutlm-document-qa)
- [Zero-shot Video Classification with X-CLIP](https://huggingface.co/docs/transformers/model_doc/xclip)
- [Zero-shot Object Detection with OWLv2](https://huggingface.co/docs/transformers/en/model_doc/owlv2)
- [Zero-shot Image Segmentation with CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)
- [Automatic Mask Generation with SAM](https://huggingface.co/docs/transformers/model_doc/sam)
## 100 projects using Transformers
Transformers is more than a toolkit to use pretrained models: it's a community of projects built around it and the
Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone
else to build their dream projects.
In order to celebrate the 100,000 stars of transformers, we have decided to put the spotlight on the
community, and we have created the [awesome-transformers](./awesome-transformers.md) page which lists 100
incredible projects built in the vicinity of transformers.
If you own or use a project that you believe should be part of the list, please open a PR to add it!
## Serious about AI in your organisation? Build faster with the Hugging Face Enterprise Hub.
<a target="_blank" href="https://huggingface.co/enterprise">
<img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
</a><br>
## Quick tour
To immediately use a model on a given input (text, image, audio, ...), we provide the `pipeline` API. Pipelines group together a pretrained model with the preprocessing that was used during that model's training. Here is how to quickly use a pipeline to classify positive versus negative texts:
```python
>>> from transformers import pipeline
# Allocate a pipeline for sentiment-analysis
>>> classifier = pipeline('sentiment-analysis')
>>> classifier('We are very happy to introduce pipeline to the transformers repository.')
[{'label': 'POSITIVE', 'score': 0.9996980428695679}]
```
Install Transformers in your virtual environment.
The second line of code downloads and caches the pretrained model used by the pipeline, while the third evaluates it on the given text. Here, the answer is "positive" with a confidence of 99.97%.
```py
# pip
pip install "transformers[torch]"
Many tasks have a pre-trained `pipeline` ready to go, in NLP but also in computer vision and speech. For example, we can easily extract detected objects in an image:
# uv
uv pip install "transformers[torch]"
``` python
>>> import requests
>>> from PIL import Image
>>> from transformers import pipeline
# Download an image with cute cats
>>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
>>> image_data = requests.get(url, stream=True).raw
>>> image = Image.open(image_data)
# Allocate a pipeline for object detection
>>> object_detector = pipeline('object-detection')
>>> object_detector(image)
[{'score': 0.9982201457023621,
'label': 'remote',
'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},
{'score': 0.9960021376609802,
'label': 'remote',
'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},
{'score': 0.9954745173454285,
'label': 'couch',
'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},
{'score': 0.9988006353378296,
'label': 'cat',
'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},
{'score': 0.9986783862113953,
'label': 'cat',
'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]
```
Install Transformers from source if you want the latest changes in the library or are interested in contributing. However, the *latest* version may not be stable. Feel free to open an [issue](https://github.com/huggingface/transformers/issues) if you encounter an error.
```shell
git clone https://github.com/huggingface/transformers.git
cd transformers
# pip
pip install '.[torch]'
# uv
uv pip install '.[torch]'
```
## Quickstart
Get started with Transformers right away with the [Pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) API. The `Pipeline` is a high-level inference class that supports text, audio, vision, and multimodal tasks. It handles preprocessing the input and returns the appropriate output.
Instantiate a pipeline and specify model to use for text generation. The model is downloaded and cached so you can easily reuse it again. Finally, pass some text to prompt the model.
```py
from transformers import pipeline
pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-1.5B")
pipeline("the secret to baking a really good cake is ")
[{'generated_text': 'the secret to baking a really good cake is 1) to use the right ingredients and 2) to follow the recipe exactly. the recipe for the cake is as follows: 1 cup of sugar, 1 cup of flour, 1 cup of milk, 1 cup of butter, 1 cup of eggs, 1 cup of chocolate chips. if you want to make 2 cakes, how much sugar do you need? To make 2 cakes, you will need 2 cups of sugar.'}]
```
To chat with a model, the usage pattern is the same. The only difference is you need to construct a chat history (the input to `Pipeline`) between you and the system.
> [!TIP]
> You can also chat with a model directly from the command line.
> ```shell
> transformers chat Qwen/Qwen2.5-0.5B-Instruct
> ```
```py
import torch
from transformers import pipeline
chat = [
{"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
{"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]
pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", dtype=torch.bfloat16, device_map="auto")
response = pipeline(chat, max_new_tokens=512)
print(response[0]["generated_text"][-1]["content"])
```
Expand the examples below to see how `Pipeline` works for different modalities and tasks.
<details>
<summary>Automatic speech recognition</summary>
```py
from transformers import pipeline
pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3")
pipeline("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
```
</details>
<details>
<summary>Image classification</summary>
Here, we get a list of objects detected in the image, with a box surrounding the object and a confidence score. Here is the original image on the left, with the predictions displayed on the right:
<h3 align="center">
<a><img src="https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png"></a>
<a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png" width="400"></a>
<a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample_post_processed.png" width="400"></a>
</h3>
```py
from transformers import pipeline
You can learn more about the tasks supported by the `pipeline` API in [this tutorial](https://huggingface.co/docs/transformers/task_summary).
pipeline = pipeline(task="image-classification", model="facebook/dinov2-small-imagenet1k-1-layer")
pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
[{'label': 'macaw', 'score': 0.997848391532898},
{'label': 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
'score': 0.0016551691805943847},
{'label': 'lorikeet', 'score': 0.00018523589824326336},
{'label': 'African grey, African gray, Psittacus erithacus',
'score': 7.85409429227002e-05},
{'label': 'quail', 'score': 5.502637941390276e-05}]
In addition to `pipeline`, to download and use any of the pretrained models on your given task, all it takes is three lines of code. Here is the PyTorch version:
```python
>>> from transformers import AutoTokenizer, AutoModel
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
>>> model = AutoModel.from_pretrained("google-bert/bert-base-uncased")
>>> inputs = tokenizer("Hello world!", return_tensors="pt")
>>> outputs = model(**inputs)
```
</details>
And here is the equivalent code for TensorFlow:
```python
>>> from transformers import AutoTokenizer, TFAutoModel
<details>
<summary>Visual question answering</summary>
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
>>> model = TFAutoModel.from_pretrained("google-bert/bert-base-uncased")
<h3 align="center">
<a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg"></a>
</h3>
```py
from transformers import pipeline
pipeline = pipeline(task="visual-question-answering", model="Salesforce/blip-vqa-base")
pipeline(
image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg",
question="What is in the image?",
)
[{'answer': 'statue of liberty'}]
>>> inputs = tokenizer("Hello world!", return_tensors="tf")
>>> outputs = model(**inputs)
```
</details>
The tokenizer is responsible for all the preprocessing the pretrained model expects and can be called directly on a single string (as in the above examples) or a list. It will output a dictionary that you can use in downstream code or simply directly pass to your model using the ** argument unpacking operator.
## Why should I use Transformers?
The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) (depending on your backend) which you can use as usual. [This tutorial](https://huggingface.co/docs/transformers/training) explains how to integrate such a model into a classic PyTorch or TensorFlow training loop, or how to use our `Trainer` API to quickly fine-tune on a new dataset.
## Why should I use transformers?
1. Easy-to-use state-of-the-art models:
- High performance on natural language understanding & generation, computer vision, audio, video, and multimodal tasks.
- Low barrier to entry for researchers, engineers, and developers.
- High performance on natural language understanding & generation, computer vision, and audio tasks.
- Low barrier to entry for educators and practitioners.
- Few user-facing abstractions with just three classes to learn.
- A unified API for using all our pretrained models.
1. Lower compute costs, smaller carbon footprint:
- Share trained models instead of training from scratch.
- Reduce compute time and production costs.
- Dozens of model architectures with 1M+ pretrained checkpoints across all modalities.
- Researchers can share trained models instead of always retraining.
- Practitioners can reduce compute time and production costs.
- Dozens of architectures with over 400,000 pretrained models across all modalities.
1. Choose the right framework for every part of a models lifetime:
1. Choose the right framework for every part of a model's lifetime:
- Train state-of-the-art models in 3 lines of code.
- Move a single model between PyTorch/JAX/TF2.0 frameworks at will.
- Pick the right framework for training, evaluation, and production.
- Move a single model between TF2.0/PyTorch/JAX frameworks at will.
- Seamlessly pick the right framework for training, evaluation, and production.
1. Easily customize a model or an example to your needs:
- We provide examples for each architecture to reproduce the results published by its original authors.
- Model internals are exposed as consistently as possible.
- Model files can be used independently of the library for quick experiments.
<a target="_blank" href="https://huggingface.co/enterprise">
<img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
</a><br>
## Why shouldn't I use Transformers?
## Why shouldn't I use transformers?
- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files.
- The training API is optimized to work with PyTorch models provided by Transformers. For generic machine learning loops, you should use another library like [Accelerate](https://huggingface.co/docs/accelerate).
- The [example scripts](https://github.com/huggingface/transformers/tree/main/examples) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work.
- The training API is not intended to work on any model but is optimized to work with the models provided by the library. For generic machine learning loops, you should use another library (possibly, [Accelerate](https://huggingface.co/docs/accelerate)).
- While we strive to present as many use cases as possible, the scripts in our [examples folder](https://github.com/huggingface/transformers/tree/main/examples) are just that: examples. It is expected that they won't work out-of-the-box on your specific problem and that you will be required to change a few lines of code to adapt them to your needs.
## 100 projects using Transformers
## Installation
Transformers is more than a toolkit to use pretrained models, it's a community of projects built around it and the
Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone
else to build their dream projects.
### With pip
In order to celebrate Transformers 100,000 stars, we wanted to put the spotlight on the
community with the [awesome-transformers](./awesome-transformers.md) page which lists 100
incredible projects built with Transformers.
This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
If you own or use a project that you believe should be part of the list, please open a PR to add it!
You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
## Example models
First, create a virtual environment with the version of Python you're going to use and activate it.
You can test most of our models directly on their [Hub model pages](https://huggingface.co/models).
Then, you will need to install at least one of Flax, PyTorch, or TensorFlow.
Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) installation pages regarding the specific installation command for your platform.
Expand each modality below to see a few example models for various use cases.
When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows:
<details>
<summary>Audio</summary>
```bash
pip install transformers
```
- Audio classification with [Whisper](https://huggingface.co/openai/whisper-large-v3-turbo)
- Automatic speech recognition with [Moonshine](https://huggingface.co/UsefulSensors/moonshine)
- Keyword spotting with [Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks)
- Speech to speech generation with [Moshi](https://huggingface.co/kyutai/moshiko-pytorch-bf16)
- Text to audio with [MusicGen](https://huggingface.co/facebook/musicgen-large)
- Text to speech with [Bark](https://huggingface.co/suno/bark)
If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source).
</details>
### With conda
<details>
<summary>Computer vision</summary>
🤗 Transformers can be installed using conda as follows:
- Automatic mask generation with [SAM](https://huggingface.co/facebook/sam-vit-base)
- Depth estimation with [DepthPro](https://huggingface.co/apple/DepthPro-hf)
- Image classification with [DINO v2](https://huggingface.co/facebook/dinov2-base)
- Keypoint detection with [SuperPoint](https://huggingface.co/magic-leap-community/superpoint)
- Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor)
- Object detection with [RT-DETRv2](https://huggingface.co/PekingU/rtdetr_v2_r50vd)
- Pose Estimation with [VitPose](https://huggingface.co/usyd-community/vitpose-base-simple)
- Universal segmentation with [OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_swin_large)
- Video classification with [VideoMAE](https://huggingface.co/MCG-NJU/videomae-large)
```shell script
conda install conda-forge::transformers
```
</details>
> **_NOTE:_** Installing `transformers` from the `huggingface` channel is deprecated.
<details>
<summary>Multimodal</summary>
Follow the installation pages of Flax, PyTorch or TensorFlow to see how to install them with conda.
- Audio or text to text with [Qwen2-Audio](https://huggingface.co/Qwen/Qwen2-Audio-7B)
- Document question answering with [LayoutLMv3](https://huggingface.co/microsoft/layoutlmv3-base)
- Image or text to text with [Qwen-VL](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
- Image captioning [BLIP-2](https://huggingface.co/Salesforce/blip2-opt-2.7b)
- OCR-based document understanding with [GOT-OCR2](https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf)
- Table question answering with [TAPAS](https://huggingface.co/google/tapas-base)
- Unified multimodal understanding and generation with [Emu3](https://huggingface.co/BAAI/Emu3-Gen)
- Vision to text with [Llava-OneVision](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)
- Visual question answering with [Llava](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
- Visual referring expression segmentation with [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224)
> **_NOTE:_** On Windows, you may be prompted to activate Developer Mode in order to benefit from caching. If this is not an option for you, please let us know in [this issue](https://github.com/huggingface/huggingface_hub/issues/1062).
</details>
## Model architectures
<details>
<summary>NLP</summary>
**[All the model checkpoints](https://huggingface.co/models)** provided by 🤗 Transformers are seamlessly integrated from the huggingface.co [model hub](https://huggingface.co/models), where they are uploaded directly by [users](https://huggingface.co/users) and [organizations](https://huggingface.co/organizations).
- Masked word completion with [ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base)
- Named entity recognition with [Gemma](https://huggingface.co/google/gemma-2-2b)
- Question answering with [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)
- Summarization with [BART](https://huggingface.co/facebook/bart-large-cnn)
- Translation with [T5](https://huggingface.co/google-t5/t5-base)
- Text generation with [Llama](https://huggingface.co/meta-llama/Llama-3.2-1B)
- Text classification with [Qwen](https://huggingface.co/Qwen/Qwen2.5-0.5B)
Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen)
</details>
🤗 Transformers currently provides the following architectures: see [here](https://huggingface.co/docs/transformers/model_summary) for a high-level summary of each them.
To check if each model has an implementation in Flax, PyTorch or TensorFlow, or has an associated tokenizer backed by the 🤗 Tokenizers library, refer to [this table](https://huggingface.co/docs/transformers/index#supported-frameworks).
These implementations have been tested on several datasets (see the example scripts) and should match the performance of the original implementations. You can find more details on performance in the Examples section of the [documentation](https://github.com/huggingface/transformers/tree/main/examples).
## Learn more
| Section | Description |
|-|-|
| [Documentation](https://huggingface.co/docs/transformers/) | Full API documentation and tutorials |
| [Task summary](https://huggingface.co/docs/transformers/task_summary) | Tasks supported by 🤗 Transformers |
| [Preprocessing tutorial](https://huggingface.co/docs/transformers/preprocessing) | Using the `Tokenizer` class to prepare data for the models |
| [Training and fine-tuning](https://huggingface.co/docs/transformers/training) | Using the models provided by 🤗 Transformers in a PyTorch/TensorFlow training loop and the `Trainer` API |
| [Quick tour: Fine-tuning/usage scripts](https://github.com/huggingface/transformers/tree/main/examples) | Example scripts for fine-tuning models on a wide range of tasks |
| [Model sharing and uploading](https://huggingface.co/docs/transformers/model_sharing) | Upload and share your fine-tuned models with the community |
## Citation

View File

@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re
models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
by the transformers library), as developed specifically to prevent arbitrary code execution on your system.
To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
### Remote code
@ -27,6 +27,13 @@ These models require the `trust_remote_code=True` parameter to be set when using
the content of the modeling files when using this argument. We recommend setting a revision in order to ensure you
protect yourself from updates on the repository.
#### Tools
Through the `Agent` framework, remote tools can be downloaded to be used by the Agent. You're to specify these tools
yourself, but please keep in mind that their code will be run on your machine if the Agent chooses to run them.
Please inspect the code of the tools before passing them to the Agent to protect your runtime and local setup.
## Reporting a Vulnerability
Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.

View File

@ -15,7 +15,7 @@ to add it.
Keywords: Open-source, LLaMa, GPT-J, instruction, assistant
## [recommenders](https://github.com/recommenders-team/recommenders)
## [recommenders](https://github.com/microsoft/recommenders)
This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization
@ -29,7 +29,7 @@ Keywords: inpainting, SD, Stable Diffusion
## [flair](https://github.com/flairNLP/flair)
FLAIR is a powerful PyTorch NLP framework, covering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.
FLAIR is a powerful PyTorch NLP framework, convering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.
Keywords: NLP, text embedding, document embedding, biomedical, NER, PoS, sentiment-analysis
@ -39,15 +39,15 @@ MindsDB is a low-code ML platform, which automates and integrates several ML fra
Keywords: Database, low-code, AI table
## [langchain](https://github.com/langchain-ai/langchain)
## [langchain](https://github.com/hwchase17/langchain)
[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
[langchain](https://github.com/hwchase17/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
Keywords: LLMs, Large Language Models, Agents, Chains
## [LlamaIndex](https://github.com/run-llama/llama_index)
## [LlamaIndex](https://github.com/jerryjliu/llama_index)
[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
[LlamaIndex](https://github.com/jerryjliu/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation
@ -146,9 +146,9 @@ Keywords: Framework, simplicity, NLP
Keywords: LLM, Agents, HF Hub
## [transformers.js](https://github.com/huggingface/transformers.js/)
## [transformers.js](https://xenova.github.io/transformers.js/)
[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
[transformers.js](https://xenova.github.io/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
Keywords: Transformers, JavaScript, browser
@ -288,7 +288,7 @@ Keywords: Music understanding, Music generation
## [dalle-flow](https://github.com/jina-ai/dalle-flow)
DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. It leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. Itt leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR.
Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR
@ -437,7 +437,7 @@ Keywords: DALL-E, Russian
Keywords: Knowledge Extraction, Knowledge Graphs
## [Nebuly](https://github.com/nebuly-ai/optimate)
## [Nebuly](https://github.com/nebuly-ai/nebuly)
Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.
@ -526,7 +526,7 @@ Keywords: Model deployment, CLoud, Mobile, Edge
## [underthesea](https://github.com/undertheseanlp/underthesea)
[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provide extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provides extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
Keywords: Vietnamese, NLP
@ -606,3 +606,4 @@ Keywords: BentoML, Framework, Deployment, AI Applications
[LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory) offers a user-friendly fine-tuning framework that incorporates PEFT. The repository includes training(fine-tuning) and inference examples for LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, and other LLMs. A ChatGLM version is also available in [ChatGLM-Efficient-Tuning](https://github.com/hiyouga/ChatGLM-Efficient-Tuning).
Keywords: PEFT, fine-tuning, LLaMA-2, ChatGLM, Qwen

View File

@ -1 +0,0 @@
benchmark_results/

View File

@ -1,49 +0,0 @@
# Benchmarks
You might want to add new benchmarks.
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
The expected function signature is the following:
```py
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
```
## Writing metrics to the database
`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
cf [`llama.py`](./llama.py) to see an example of this in practice.
```py
from benchmarks_entrypoint import MetricsRecorder
import psycopg2
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
# To collect device measurements
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
)
# To collect your model measurements
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
)
```

View File

@ -1,353 +0,0 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from logging import Logger
from threading import Event, Thread
from time import perf_counter, sleep
# Add the parent directory to Python path to import benchmarks_entrypoint
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import gpustat
import psutil
import psycopg2
from benchmarks_entrypoint import MetricsRecorder
# Optional heavy ML dependencies - only required when actually running the benchmark
try:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
torch = None
AutoModelForCausalLM = None
AutoTokenizer = None
GenerationConfig = None
StaticCache = None
os.environ["HF_XET_HIGH_PERFORMANCE"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "1"
# Only set torch precision if torch is available
if TRANSFORMERS_AVAILABLE:
torch.set_float32_matmul_precision("high")
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
p = psutil.Process(os.getpid())
while not continue_metric_collection.is_set():
with p.oneshot():
cpu_util = p.cpu_percent()
mem_megabytes = p.memory_info().rss / (1024 * 1024)
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_util = gpu_stats[0]["utilization.gpu"]
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
)
sleep(0.01)
def run_benchmark(
logger: Logger,
repository: str,
branch: str,
commit_id: str,
commit_msg: str,
metrics_recorder=None,
num_tokens_to_generate=100,
):
# Check if required ML dependencies are available
if not TRANSFORMERS_AVAILABLE:
logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
logger.error("pip install torch transformers")
logger.error("Skipping LLaMA benchmark due to missing dependencies.")
return
continue_metric_collection = Event()
metrics_thread = None
model_id = "meta-llama/Llama-2-7b-hf"
# If no metrics_recorder is provided, create one for backward compatibility
if metrics_recorder is None:
try:
metrics_recorder = MetricsRecorder(
psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
)
should_close_recorder = True
except Exception as e:
logger.error(f"Failed to create metrics recorder: {e}")
return
else:
should_close_recorder = False
try:
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_name = gpu_stats[0]["name"]
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
metrics_thread = Thread(
target=collect_metrics,
args=[benchmark_id, continue_metric_collection, metrics_recorder],
)
metrics_thread.start()
logger.info("started background thread to fetch device metrics")
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
device = "cuda"
logger.info("downloading weights")
# This is to avoid counting download in model load time measurement
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
logger.info("loading model")
start = perf_counter()
model = AutoModelForCausalLM.from_pretrained(
model_id, dtype=torch.float16, generation_config=gen_config
).eval()
model.to(device)
torch.cuda.synchronize()
end = perf_counter()
model_load_time = end - start
logger.info(f"loaded model in: {model_load_time}s")
tokenizer = AutoTokenizer.from_pretrained(model_id)
prompt = "Why dogs are so cute?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# Specify the max length (including both the prompt and the response)
# When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
# with sequence length = `max_length`. The longer the more you will re-use it
seq_length = inputs["input_ids"].shape[1]
model.generation_config.max_length = seq_length + num_tokens_to_generate
batch_size = inputs["input_ids"].shape[0]
# Copied from the gpt-fast repo
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
q = torch.empty_like(probs_sort).exponential_(1)
return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
def logits_to_probs(logits, temperature: float = 1.0, top_k: int | None = None):
logits = logits / max(temperature, 1e-5)
if top_k is not None:
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
pivot = v.select(-1, -1).unsqueeze(-1)
logits = torch.where(logits < pivot, -float("Inf"), logits)
probs = torch.nn.functional.softmax(logits, dim=-1)
return probs
def sample(logits, temperature: float = 1.0, top_k: int | None = None):
probs = logits_to_probs(logits[0, -1], temperature, top_k)
idx_next = multinomial_sample_one_no_sync(probs)
return idx_next, probs
# First eager forward pass
logger.info("running first eager forward pass")
start = perf_counter()
_ = model(**inputs)
torch.cuda.synchronize()
end = perf_counter()
first_eager_fwd_pass_time = end - start
logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
# Second eager forward pass (should be faster)
logger.info("running second eager forward pass")
start = perf_counter()
_ = model(**inputs)
torch.cuda.synchronize()
end = perf_counter()
second_eager_fwd_pass_time = end - start
logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
# First eager generation
logger.info("running first eager generation")
start = perf_counter()
output = model.generate(**inputs)
torch.cuda.synchronize()
end = perf_counter()
first_eager_generate_time = end - start
logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
# Second eager generation (should be faster)
logger.info("running second eager generation")
start = perf_counter()
output = model.generate(**inputs)
torch.cuda.synchronize()
end = perf_counter()
second_eager_generate_time = end - start
logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
logger.info("running generation timing loop")
input_pos = torch.arange(0, seq_length, device=device)
inputs = inputs["input_ids"]
start = perf_counter()
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
logits = model(inputs, position_ids=input_pos).logits
next_token, probs = sample(logits, temperature=0.6, top_k=5)
torch.cuda.synchronize()
end = perf_counter()
time_to_first_token = end - start
input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
next_token = next_token.clone()
start = perf_counter()
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
logits = model(next_token, position_ids=input_pos).logits
next_token, probs = sample(logits, temperature=0.6, top_k=5)
torch.cuda.synchronize()
end = perf_counter()
time_to_second_token = end - start
input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
next_token = next_token.clone()
start = perf_counter()
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
logits = model(next_token, position_ids=input_pos).logits
next_token, probs = sample(logits, temperature=0.6, top_k=5)
torch.cuda.synchronize()
end = perf_counter()
time_to_third_token = end - start
logger.info("running longer generation timing loop")
total_time = 0
for i in range(20):
input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
next_token = next_token.clone()
start = perf_counter()
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
logits = model(next_token, position_ids=input_pos).logits
next_token, probs = sample(logits, temperature=0.6, top_k=5)
torch.cuda.synchronize()
end = perf_counter()
total_time += end - start
mean_time_to_next_token = total_time / 20
logger.info("running compilation benchmarks")
# Now compile the model
model = torch.compile(model, mode="max-autotune", fullgraph=True)
# StaticCache for generation
with torch.device(device):
model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
input_pos = torch.arange(0, seq_length, device=device)
inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
logger.info("compiling model")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config)
model.to(device)
model = torch.compile(model, mode="max-autotune", fullgraph=True)
past_key_values = StaticCache(
model.config,
max_batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 1st call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
first_compile_generate_time = end - start
logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
max_batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 2nd call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
second_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
max_batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 3rd call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
third_compile_generate_time = end - start
logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
max_batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 4th call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
fourth_compile_generate_time = end - start
logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
)
except Exception as e:
logger.error(f"Caught exception: {e}")
continue_metric_collection.set()
if metrics_thread is not None:
metrics_thread.join()
# Only close the recorder if we created it locally
if should_close_recorder:
metrics_recorder.close()

View File

@ -31,7 +31,9 @@ from contextlib import contextmanager
from pathlib import Path
from git import Repo
from huggingface_hub import HfApi
from optimum_benchmark import Benchmark
from optimum_benchmark_wrapper import main
@ -88,7 +90,7 @@ def summarize(run_dir, metrics, expand_metrics=False):
model = benchmark.config.backend["model"]
# This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
# Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
# (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
benchmark_name = str(Path(benchmark_name).parts[-1])

View File

@ -1,502 +0,0 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import importlib.util
import json
import logging
import os
import sys
import uuid
from datetime import datetime
import pandas as pd
try:
from psycopg2.extensions import register_adapter
from psycopg2.extras import Json
register_adapter(dict, Json)
PSYCOPG2_AVAILABLE = True
except ImportError:
PSYCOPG2_AVAILABLE = False
class ImportModuleException(Exception):
pass
class MetricsRecorder:
def __init__(
self,
connection,
logger: logging.Logger,
repository: str,
branch: str,
commit_id: str,
commit_msg: str,
collect_csv_data: bool = True,
):
self.conn = connection
self.use_database = connection is not None
if self.use_database:
self.conn.autocommit = True
self.logger = logger
self.repository = repository
self.branch = branch
self.commit_id = commit_id
self.commit_msg = commit_msg
self.collect_csv_data = collect_csv_data
# For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
if self.collect_csv_data:
# Initialize empty DataFrames with proper schemas
self.benchmarks_df = pd.DataFrame(
columns=[
"benchmark_id",
"repository",
"branch",
"commit_id",
"commit_message",
"metadata",
"created_at",
]
)
self.device_measurements_df = pd.DataFrame(
columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"]
)
self.model_measurements_df = pd.DataFrame(
columns=[
"benchmark_id",
"time",
"model_load_time",
"first_eager_forward_pass_time_secs",
"second_eager_forward_pass_time_secs",
"first_eager_generate_time_secs",
"second_eager_generate_time_secs",
"time_to_first_token_secs",
"time_to_second_token_secs",
"time_to_third_token_secs",
"time_to_next_token_mean_secs",
"first_compile_generate_time_secs",
"second_compile_generate_time_secs",
"third_compile_generate_time_secs",
"fourth_compile_generate_time_secs",
]
)
else:
self.benchmarks_df = None
self.device_measurements_df = None
self.model_measurements_df = None
def initialise_benchmark(self, metadata: dict[str, str]) -> str:
"""
Creates a new benchmark, returns the benchmark id (UUID)
"""
# Generate a unique UUID for this benchmark
benchmark_id = str(uuid.uuid4())
if self.use_database:
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
(benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
)
self.logger.debug(f"initialised benchmark #{benchmark_id}")
# Store benchmark data for CSV export (if enabled)
if self.collect_csv_data:
# Add row to pandas DataFrame
new_row = pd.DataFrame(
[
{
"benchmark_id": benchmark_id,
"repository": self.repository,
"branch": self.branch,
"commit_id": self.commit_id,
"commit_message": self.commit_msg,
"metadata": json.dumps(metadata),
"created_at": datetime.utcnow().isoformat(),
}
]
)
self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
mode_info = []
if self.use_database:
mode_info.append("database")
if self.collect_csv_data:
mode_info.append("CSV")
mode_str = " + ".join(mode_info) if mode_info else "no storage"
self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
return benchmark_id
def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
"""
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
"""
# Store device measurements for CSV export (if enabled)
if self.collect_csv_data:
# Add row to pandas DataFrame
new_row = pd.DataFrame(
[
{
"benchmark_id": benchmark_id,
"cpu_util": cpu_util,
"mem_megabytes": mem_megabytes,
"gpu_util": gpu_util,
"gpu_mem_megabytes": gpu_mem_megabytes,
"time": datetime.utcnow().isoformat(),
}
]
)
self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
# Store in database if available
if self.use_database:
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
self.logger.debug(
f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
)
def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
# Store model measurements for CSV export (if enabled)
if self.collect_csv_data:
# Add row to pandas DataFrame with flattened measurements
row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()}
# Flatten the measurements dict into the row
row_data.update(measurements)
new_row = pd.DataFrame([row_data])
self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
# Store in database if available
if self.use_database:
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
measurements,
),
)
self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
def export_to_csv(self, output_dir: str = "benchmark_results"):
"""
Export all collected data to CSV files using pandas DataFrames
"""
if not self.collect_csv_data:
self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
return
if not os.path.exists(output_dir):
os.makedirs(output_dir)
self.logger.info(f"Created output directory: {output_dir}")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
files_created = []
# Export using pandas DataFrames
self._export_pandas_data(output_dir, timestamp, files_created)
self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
"""
Export CSV files using pandas DataFrames
"""
# Export benchmarks
benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
self.benchmarks_df.to_csv(benchmarks_file, index=False)
files_created.append(benchmarks_file)
self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
# Export device measurements
device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
self.device_measurements_df.to_csv(device_file, index=False)
files_created.append(device_file)
self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
# Export model measurements (already flattened)
model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
self.model_measurements_df.to_csv(model_file, index=False)
files_created.append(model_file)
self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
# Create comprehensive summary using pandas operations
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
self._create_summary(summary_file)
files_created.append(summary_file)
def _create_summary(self, summary_file: str):
"""
Create a comprehensive summary CSV using pandas operations
"""
if len(self.benchmarks_df) == 0:
# Create empty summary file
summary_df = pd.DataFrame()
summary_df.to_csv(summary_file, index=False)
self.logger.info(f"Created empty benchmark summary at {summary_file}")
return
# Start with benchmarks as the base
summary_df = self.benchmarks_df.copy()
# Add model measurements (join on benchmark_id)
if len(self.model_measurements_df) > 0:
# Drop 'time' column from model measurements to avoid conflicts
model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore")
summary_df = summary_df.merge(model_df, on="benchmark_id", how="left")
# Calculate device measurement aggregates using pandas groupby
if len(self.device_measurements_df) > 0:
device_agg = (
self.device_measurements_df.groupby("benchmark_id")
.agg(
{
"cpu_util": ["mean", "max", "std", "count"],
"mem_megabytes": ["mean", "max", "std"],
"gpu_util": ["mean", "max", "std"],
"gpu_mem_megabytes": ["mean", "max", "std"],
}
)
.round(3)
)
# Flatten column names
device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
device_agg = device_agg.reset_index()
# Rename count column to be more descriptive
if "cpu_util_count" in device_agg.columns:
device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"})
# Merge with summary
summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left")
# Export the comprehensive summary
summary_df.to_csv(summary_file, index=False)
self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
def close(self):
if self.use_database and self.conn:
self.conn.close()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
def parse_arguments() -> tuple[str, str, str, str, bool, str]:
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"repository",
type=str,
help="The repository name on which the benchmarking is performed.",
)
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.")
parser.add_argument(
"--csv-output-dir",
type=str,
default="benchmark_results",
help="Directory for CSV output files (default: benchmark_results).",
)
args = parser.parse_args()
# CSV is disabled by default, only enabled when --csv is used
generate_csv = args.csv
return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
def import_from_path(module_name, file_path):
try:
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
except Exception as e:
raise ImportModuleException(f"failed to load python module: {e}")
def create_database_connection():
"""
Try to create a database connection. Returns None if connection fails.
"""
if not PSYCOPG2_AVAILABLE:
logger.warning("psycopg2 not available - running in CSV-only mode")
return None
try:
import psycopg2
conn = psycopg2.connect("dbname=metrics")
logger.info("Successfully connected to database")
return conn
except Exception as e:
logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
return None
def create_global_metrics_recorder(
repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False
) -> MetricsRecorder:
"""
Create a global metrics recorder that will be used across all benchmarks.
"""
connection = create_database_connection()
recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
# Log the storage mode
storage_modes = []
if connection is not None:
storage_modes.append("database")
if generate_csv:
storage_modes.append("CSV")
if not storage_modes:
logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
logger.warning("Use --csv flag to enable CSV output when database is unavailable")
else:
logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
return recorder
if __name__ == "__main__":
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
benches_folder_path = os.path.join(benchmarks_folder_path, "benches")
repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
# Create a global metrics recorder
global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
successful_benchmarks = 0
failed_benchmarks = 0
# Automatically discover all benchmark modules in benches/ folder
benchmark_modules = []
if os.path.exists(benches_folder_path):
logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
for entry in os.scandir(benches_folder_path):
if not entry.name.endswith(".py"):
continue
if entry.name.startswith("__"): # Skip __init__.py, __pycache__, etc.
continue
# Check if the file has a run_benchmark function
try:
logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
module = import_from_path(entry.name.split(".")[0], entry.path)
if hasattr(module, "run_benchmark"):
benchmark_modules.append(entry.name)
logger.debug(f"discovered benchmark: {entry.name}")
else:
logger.debug(f"skipping {entry.name} - no run_benchmark function found")
except Exception as e:
logger.debug(f"failed to check benches/{entry.name}: {e}")
else:
logger.warning(f"Benches directory not found: {benches_folder_path}")
if benchmark_modules:
logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
else:
logger.warning("No benchmark modules found in benches/ directory")
for module_name in benchmark_modules:
module_path = os.path.join(benches_folder_path, module_name)
try:
logger.debug(f"loading: {module_name}")
module = import_from_path(module_name.split(".")[0], module_path)
logger.info(f"running benchmarks in: {module_name}")
# Check if the module has an updated run_benchmark function that accepts metrics_recorder
try:
# Try the new signature first
module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
except TypeError:
# Fall back to the old signature for backward compatibility
logger.warning(
f"Module {module_name} using old run_benchmark signature - database connection will be created per module"
)
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
successful_benchmarks += 1
except ImportModuleException as e:
logger.error(e)
failed_benchmarks += 1
except Exception as e:
logger.error(f"error running benchmarks for {module_name}: {e}")
failed_benchmarks += 1
# Export CSV results at the end (if enabled)
try:
if generate_csv:
global_metrics_recorder.export_to_csv(csv_output_dir)
logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
else:
logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
except Exception as e:
logger.error(f"Failed to export CSV results: {e}")
finally:
global_metrics_recorder.close()

View File

@ -19,7 +19,7 @@ backend:
model: meta-llama/Llama-2-7b-hf
cache_implementation: static
torch_compile: true
dtype: float16
torch_dtype: float16
torch_compile_config:
backend: inductor
mode: reduce-overhead

View File

@ -1,10 +0,0 @@
apiVersion: 1
providers:
- name: 'Transformers Benchmarks'
orgId: 1
type: file
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboards

View File

@ -30,7 +30,7 @@
"title": "Go to data",
"tooltip": "Go to data",
"type": "link",
"url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
"url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
}
],
"liveNow": true,
@ -77,7 +77,7 @@
"properties": [
{
"id": "custom.width",
"value": 202
"value": 196
}
]
},
@ -101,7 +101,7 @@
"properties": [
{
"id": "custom.width",
"value": 524
"value": 581
}
]
},
@ -113,19 +113,7 @@
"properties": [
{
"id": "custom.width",
"value": 353
}
]
},
{
"matcher": {
"id": "byName",
"options": "model_id"
},
"properties": [
{
"id": "custom.width",
"value": 216
"value": 379
}
]
}
@ -155,14 +143,12 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"type": "grafana-postgresql-datasource"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -320,14 +306,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -446,14 +431,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -581,14 +565,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -703,14 +686,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -825,14 +807,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -947,14 +928,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1082,14 +1062,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1204,14 +1183,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1326,14 +1304,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1448,14 +1425,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1504,7 +1480,11 @@
"id": 15,
"panels": [
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1548,7 +1528,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1582,9 +1563,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -1685,7 +1665,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1729,7 +1713,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1763,9 +1748,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -1866,7 +1850,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1910,7 +1898,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1944,9 +1933,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -2047,7 +2035,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -2091,7 +2083,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2125,9 +2118,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -2232,6 +2224,7 @@
"type": "row"
}
],
"refresh": "",
"schemaVersion": 39,
"tags": [],
"templating": {
@ -2243,7 +2236,6 @@
"value": "main"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2256,7 +2248,7 @@
"name": "branch",
"options": [],
"query": "SELECT DISTINCT branch FROM benchmarks;",
"refresh": 1,
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@ -2269,7 +2261,6 @@
"value": "1729701492845"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2290,11 +2281,10 @@
{
"current": {
"selected": false,
"text": "1730393397577",
"value": "1730393397577"
"text": "1730120430069",
"value": "1730120430069"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2322,16 +2312,15 @@
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"description": "",
"definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
"hide": 0,
"includeAll": false,
"label": "GPU",
"multi": false,
"name": "gpu_name",
"options": [],
"query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"refresh": 1,
"query": "SELECT DISTINCT gpu_name FROM benchmarks;",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@ -2339,7 +2328,7 @@
},
{
"current": {
"selected": true,
"selected": false,
"text": "10",
"value": "10"
},
@ -2370,6 +2359,6 @@
"timezone": "browser",
"title": "Transformers benchmarks",
"uid": "fdz33iyzln9c0a",
"version": 10,
"version": 4,
"weekStart": ""
}

View File

@ -1,17 +0,0 @@
apiVersion: 1
datasources:
- name: grafana-postgresql-datasource
uid: be28nkzirtb0gd
type: postgres
url: $GRAFANA_POSTGRES_DATASOURCE_URL
user: $GRAFANA_POSTGRES_DATASOURCE_USER
secureJsonData:
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
jsonData:
database: metrics
maxOpenConns: 100
maxIdleConns: 100
maxIdleConnsAuto: true
connMaxLifetime: 14400
postgresVersion: 1000
timescaledb: false

33
benchmark/init_db.sql Normal file
View File

@ -0,0 +1,33 @@
CREATE TABLE IF NOT EXISTS benchmarks (
benchmark_id SERIAL PRIMARY KEY,
branch VARCHAR(255),
commit_id VARCHAR(72),
commit_message VARCHAR(70),
gpu_name VARCHAR(255),
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
);
CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id);
CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch);
CREATE TABLE IF NOT EXISTS device_measurements (
measurement_id SERIAL PRIMARY KEY,
benchmark_id int REFERENCES benchmarks (benchmark_id),
cpu_util double precision,
mem_megabytes double precision,
gpu_util double precision,
gpu_mem_megabytes double precision,
time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
);
CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id);
CREATE TABLE IF NOT EXISTS model_measurements (
measurement_id SERIAL PRIMARY KEY,
benchmark_id int REFERENCES benchmarks (benchmark_id),
measurements jsonb,
time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
);
CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id);

408
benchmark/llama.py Normal file
View File

@ -0,0 +1,408 @@
import argparse
import json
import logging
import os
import sys
from statistics import mean
from threading import Event, Thread
from time import perf_counter, sleep
from typing import Optional
import gpustat
import psutil
import psycopg2
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
os.environ["TOKENIZERS_PARALLELISM"] = "1"
torch.set_float32_matmul_precision("high")
register_adapter(dict, Json)
def parse_arguments():
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
def collect_metrics(benchmark_id, continue_metric_collection):
p = psutil.Process(os.getpid())
conn = psycopg2.connect("dbname=metrics")
cur = conn.cursor()
while not continue_metric_collection.is_set():
with p.oneshot():
cpu_util = p.cpu_percent()
mem_megabytes = p.memory_info().rss / (1024 * 1024)
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_util = gpu_stats[0]["utilization.gpu"]
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
sleep(0.01)
conn.commit()
conn.close()
def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
continue_metric_collection = Event()
metrics_thread = None
try:
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_name = gpu_stats[0]["name"]
conn = psycopg2.connect("dbname=metrics")
cur = conn.cursor()
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(branch, commit_id, commit_msg, gpu_name),
)
conn.commit()
benchmark_id = cur.fetchone()[0]
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
metrics_thread.start()
logger.info("started background thread to fetch device metrics")
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
device = "cuda"
ckpt = "meta-llama/Llama-2-7b-hf"
logger.info("downloading weights")
# This is to avoid counting download in model load time measurement
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
logger.info("loading model")
start = perf_counter()
model = AutoModelForCausalLM.from_pretrained(
ckpt, torch_dtype=torch.float16, generation_config=gen_config
).eval()
model.to(device)
torch.cuda.synchronize()
end = perf_counter()
model_load_time = end - start
logger.info(f"loaded model in: {model_load_time}s")
tokenizer = AutoTokenizer.from_pretrained(ckpt)
prompt = "Why dogs are so cute?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# Specify the max length (including both the prompt and the response)
# When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
# with sequence length = `max_length`. The longer the more you will re-use it
seq_length = inputs["input_ids"].shape[1]
model.generation_config.max_length = seq_length + num_tokens_to_generate
batch_size = inputs["input_ids"].shape[0]
# Copied from the gpt-fast repo
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
q = torch.empty_like(probs_sort).exponential_(1)
return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
logits = logits / max(temperature, 1e-5)
if top_k is not None:
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
pivot = v.select(-1, -1).unsqueeze(-1)
logits = torch.where(logits < pivot, -float("Inf"), logits)
probs = torch.nn.functional.softmax(logits, dim=-1)
return probs
def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
probs = logits_to_probs(logits[:, -1], temperature, top_k)
idx_next = multinomial_sample_one_no_sync(probs)
return idx_next, probs
def decode_one_token(model, cur_token, cache_position, past_key_values):
logits = model(
cur_token,
cache_position=cache_position,
past_key_values=past_key_values,
return_dict=False,
use_cache=True,
)[0]
new_token = sample(logits, temperature=0.6, top_k=5)[0]
return new_token
#########
# Eager #
#########
with torch.no_grad():
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + num_tokens_to_generate,
)
cache_position = torch.arange(seq_length, device=device)
start = perf_counter()
model(
**inputs,
cache_position=cache_position,
past_key_values=past_key_values,
return_dict=False,
use_cache=True,
)
end = perf_counter()
first_eager_fwd_pass_time = end - start
logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s")
start = perf_counter()
output = model.generate(**inputs, do_sample=False)
end = perf_counter()
first_eager_generate_time = end - start
logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + num_tokens_to_generate,
)
cache_position = torch.arange(seq_length, device=device)
start = perf_counter()
model(
**inputs,
cache_position=cache_position,
past_key_values=past_key_values,
return_dict=False,
use_cache=True,
)
end = perf_counter()
second_eager_fwd_pass_time = end - start
logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s")
start = perf_counter()
model.generate(**inputs, do_sample=False)
end = perf_counter()
second_eager_generate_time = end - start
logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
torch.compiler.reset()
################
# Forward pass #
################
# `torch.compile(model, ...)` is not recommended as you compile callbacks
# and full generate. We recommend compiling only the forward for now.
# "reduce-overhead" will use cudagraphs.
generated_ids = torch.zeros(
(batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device
)
generated_ids[:, :seq_length] = inputs["input_ids"]
decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True)
# model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
# TODO use decode_one_token(model, input_id.clone(), cache_position) for verification
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + num_tokens_to_generate + 10,
)
cache_position = torch.arange(seq_length, device=device)
all_generated_tokens = []
### First compile, prefill
start = perf_counter()
next_token = decode_one_token(
model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values
)
torch.cuda.synchronize()
end = perf_counter()
time_to_first_token = end - start
logger.info(f"completed first compile generation in: {time_to_first_token}s")
cache_position += 1
all_generated_tokens += next_token.clone().detach().cpu().tolist()
cache_position = torch.tensor([seq_length], device=device)
### First compile, decoding
start = perf_counter()
next_token = decode_one_token(
model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
)
torch.cuda.synchronize()
end = perf_counter()
time_to_second_token = end - start
logger.info(f"completed second compile generation in: {time_to_first_token}s")
cache_position += 1
all_generated_tokens += next_token.clone().detach().cpu().tolist()
### Second compile, decoding
start = perf_counter()
next_token = decode_one_token(
model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
)
torch.cuda.synchronize()
end = perf_counter()
time_to_third_token = end - start
logger.info(f"completed third compile forward in: {time_to_first_token}s")
cache_position += 1
all_generated_tokens += next_token.clone().detach().cpu().tolist()
### Using cuda graphs decoding
start = perf_counter()
for _ in range(1, num_tokens_to_generate):
all_generated_tokens += next_token.clone().detach().cpu().tolist()
next_token = decode_one_token(
model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
)
cache_position += 1
torch.cuda.synchronize()
end = perf_counter()
mean_time_to_next_token = (end - start) / num_tokens_to_generate
logger.info(f"completed next compile generation in: {mean_time_to_next_token}s")
logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}")
####################
# Generate compile #
####################
torch.compiler.reset()
# we will not compile full generate as it' s to intensive, tho we measure full forward!
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 1st call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
torch.cuda.synchronize()
end = perf_counter()
first_compile_generate_time = end - start
logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 2nd call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
torch.cuda.synchronize()
end = perf_counter()
second_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 3nd call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
third_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache(
model.config,
batch_size=batch_size,
device=device,
dtype=torch.float16,
max_cache_len=seq_length + 128,
)
# 4th call
start = perf_counter()
output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter()
fourth_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
),
)
conn.commit()
conn.close()
except Exception as e:
logger.error(f"Caught exception: {e}")
continue_metric_collection.set()
if metrics_thread is not None:
metrics_thread.join()
if __name__ == "__main__":
branch, commit_id, commit_msg = parse_arguments()
run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)

View File

@ -3,11 +3,7 @@ import subprocess
def main(config_dir, config_name, args):
subprocess.run(
["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"]
+ ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"]
+ args
)
subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
if __name__ == "__main__":

View File

@ -2,5 +2,4 @@ gpustat==1.1.1
psutil==6.0.0
psycopg2==2.9.9
torch>=2.4.0
hf_xet
pandas>=1.5.0
hf_transfer

View File

@ -1,2 +0,0 @@
benchmark_results/
benchmark_results_profiles/

View File

@ -1,138 +0,0 @@
# Benchmarking v2
A comprehensive benchmarking framework for transformer models that supports multiple execution modes (eager, compiled, kernelized), detailed performance metrics collection, and structured output format.
## Quick Start
### Running All Benchmarks
```bash
# Run all benchmarks with default settings
python run_benchmarks.py
# Specify output directory
python run_benchmarks.py --output-dir my_results
# Run with custom parameters
python run_benchmarks.py \
--warmup-iterations 5 \
--measurement-iterations 10 \
--num-tokens-to-generate 200
```
### Uploading Results to HuggingFace Dataset
You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis:
```bash
# Upload to a public dataset with auto-generated run ID
python run_benchmarks.py --upload-to-hub username/benchmark-results
# Upload with a custom run ID for easy identification
python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1
# Upload with custom HuggingFace token (if not set in environment)
python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here
```
**Dataset Directory Structure:**
```
dataset_name/
├── 2025-01-15/
│ ├── runs/ # Non-scheduled runs (manual, PR, etc.)
│ │ └── 123-1245151651/ # GitHub run number and ID
│ │ └── benchmark_results/
│ │ ├── benchmark_summary_20250115_143022.json
│ │ └── model-name/
│ │ └── model-name_benchmark_20250115_143022.json
│ └── benchmark_results_abc123de/ # Scheduled runs (daily CI)
│ ├── benchmark_summary_20250115_143022.json
│ └── model-name/
│ └── model-name_benchmark_20250115_143022.json
└── 2025-01-16/
└── ...
```
**Authentication for Uploads:**
For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence):
1. Command line: `--token hf_your_token_here`
3. Environment variable: `HF_TOKEN`
### Running Specific Benchmarks
```bash
# Include only specific benchmarks
python run_benchmarks.py --include llama
# Exclude specific benchmarks
python run_benchmarks.py --exclude old_benchmark
## Output Format
Results are saved as JSON files with the following structure:
```json
{
"model_name": "llama_2_7b",
"benchmark_scenarios": [
{
"scenario_name": "eager_variant",
"metadata": {
"timestamp": "2025-01-XX...",
"commit_id": "abc123...",
"hardware_info": {
"gpu_name": "NVIDIA A100",
"gpu_memory_total": 40960,
"cpu_count": 64
},
"config": {
"variant": "eager",
"warmup_iterations": 3,
"measurement_iterations": 5
}
},
"measurements": {
"latency": {
"mean": 2.45,
"median": 2.43,
"std": 0.12,
"min": 2.31,
"max": 2.67,
"p95": 2.61,
"p99": 2.65
},
"time_to_first_token": {
"mean": 0.15,
"std": 0.02
},
"tokens_per_second": {
"mean": 87.3,
"unit": "tokens/sec"
}
},
"gpu_metrics": {
"gpu_utilization_mean": 85.2,
"gpu_memory_used_mean": 12450
}
}
]
}
```
### Debug Mode
```bash
python run_benchmarks.py --log-level DEBUG
```
## Contributing
To add new benchmarks:
1. Create a new file in `benches/`
2. Implement the `ModelBenchmark` interface
3. Add a runner function (`run_<benchmark_name>` or `run_benchmark`)
4. run_benchmarks.py

View File

@ -1,214 +0,0 @@
import hashlib
import json
import logging
from typing import Any
KERNELIZATION_AVAILABLE = False
try:
from kernels import Mode, kernelize # noqa: F401
KERNELIZATION_AVAILABLE = True
except ImportError:
pass
logger = logging.getLogger(__name__)
class BenchmarkConfig:
"""Configuration for a single benchmark scenario."""
def __init__(
self,
warmup_iterations: int = 5,
measurement_iterations: int = 20,
gpu_monitoring: bool = True, # NOTE: you may want to disable this at times as we have obsvered it could heavily slow down benchmarks on AMD
batch_size: int = 1,
sequence_length: int = 128,
num_tokens_to_generate: int = 128,
attn_implementation: str = "eager",
sdpa_backend: str | None = None,
compile_mode: str | None = None,
compile_options: dict[str, Any] | None = None,
kernelize: bool = False,
name: str | None = None,
skip_validity_check: bool = False,
) -> None:
# Benchmark parameters
self.warmup_iterations = warmup_iterations
self.measurement_iterations = measurement_iterations
self.gpu_monitoring = gpu_monitoring
# Input parameters
self.batch_size = batch_size
self.sequence_length = sequence_length
self.num_tokens_to_generate = num_tokens_to_generate
# Generation parameters
self.attn_implementation = attn_implementation
self.sdpa_backend = sdpa_backend
# Optimization parameters
self.compile_mode = compile_mode
self.compile_options = compile_options if compile_options is not None else {}
self.kernelize = kernelize
# Constant parameters
self.dtype = "torch.bfloat16"
self.device = "cuda"
self.check_validity(skip_validity_check)
self.name = name if name is not None else self.infer_name()
def check_validity(self, skip_validity_check: bool = False) -> None:
if skip_validity_check:
return
# Flash attention does not support compile mode, so we turn it off # FIXME: it would be better to support it
is_fa = self.attn_implementation == "flash_attention_2"
is_fa |= self.attn_implementation == "sdpa" and self.sdpa_backend == "flash_attention"
if is_fa:
logger.warning("Flash attention does not support compile mode. Turning off compile mode.")
self.compile_mode = None
@property
def hash(self) -> str:
return hashlib.sha256(json.dumps(self.to_dict()).encode()).hexdigest()
def infer_name(self, compact: bool = True) -> str:
"""Infer a human-readable name for the benchmark config, either compact or verbose."""
if compact:
iter_str = f"w{self.warmup_iterations}_i{self.measurement_iterations}"
gpu_monitor_str = "monitored" if self.gpu_monitoring else "unmonitored"
dimensions_str = f"b{self.batch_size}_s{self.sequence_length}_n{self.num_tokens_to_generate}"
attn_code = self.attn_implementation
attn_code += f"_{self.sdpa_backend}" if self.attn_implementation == "sdpa" else ""
compile_str = f"compiled_{self.compile_mode}" if self.compile_mode is not None else "uncompiled"
kernelize_str = "kernelized" if self.kernelize else "unkernelized"
sep = "-"
else:
iter_str = f"{self.warmup_iterations} warmup, {self.measurement_iterations} iterations"
gpu_monitor_str = ("with" if self.gpu_monitoring else "no") + " GPU monitoring"
dimensions_str = f"batch size {self.batch_size}, sequence length {self.sequence_length}, {self.num_tokens_to_generate} generated tokens"
attn_code = f"{self.attn_implementation} attention"
attn_code += f" with {self.sdpa_backend} backend" if self.attn_implementation == "sdpa" else ""
compile_str = "compiled" if self.compile_mode is not None else "not compiled"
kernelize_str = "kernelized" if self.kernelize else "not kernelized"
sep = ", "
return sep.join([iter_str, gpu_monitor_str, dimensions_str, attn_code, compile_str, kernelize_str])
def to_dict(self) -> dict[str, Any]:
return {
"name": self.name,
"warmup_iterations": self.warmup_iterations,
"measurement_iterations": self.measurement_iterations,
"gpu_monitoring": self.gpu_monitoring,
"batch_size": self.batch_size,
"sequence_length": self.sequence_length,
"num_tokens_to_generate": self.num_tokens_to_generate,
"attn_implementation": self.attn_implementation,
"sdpa_backend": self.sdpa_backend,
"compile_mode": self.compile_mode,
"compile_options": self.compile_options | {}, # to avoid inplace modification of the original dict
"kernelize": self.kernelize,
}
@classmethod
def from_dict(cls, data: dict[str, Any], skip_validity_check: bool = False) -> "BenchmarkConfig":
return cls(
warmup_iterations=data.get("warmup_iterations", 5),
measurement_iterations=data.get("measurement_iterations", 20),
gpu_monitoring=data.get("gpu_monitoring", False),
batch_size=data.get("batch_size", 1),
sequence_length=data.get("sequence_length", 128),
num_tokens_to_generate=data.get("num_tokens_to_generate", 128),
attn_implementation=data.get("attn_implementation", "eager"),
sdpa_backend=data.get("sdpa_backend"),
compile_mode=data.get("compile_mode"),
compile_options=data.get("compile_options"),
kernelize=data.get("kernelize", False),
name=data.get("name"),
skip_validity_check=skip_validity_check,
)
def cross_generate_configs(
attn_impl_and_sdpa_backend: list[tuple[str, str | None]],
compiled_mode: list[str | None],
kernelized: list[bool],
warmup_iterations: int = 5,
measurement_iterations: int = 20,
batch_size: int = 1,
sequence_length: int = 128,
num_tokens_to_generate: int = 128,
gpu_monitoring: bool = True,
) -> list[BenchmarkConfig]:
# Create kwargs common to all configs
kwargs = {
"warmup_iterations": warmup_iterations,
"measurement_iterations": measurement_iterations,
"batch_size": batch_size,
"sequence_length": sequence_length,
"num_tokens_to_generate": num_tokens_to_generate,
"gpu_monitoring": gpu_monitoring,
}
# Cross-generate all combinations of attn_implementation, compiled_mode, and kernelized
configs = []
for attn_implementation, sdpa_backend in list(dict.fromkeys(attn_impl_and_sdpa_backend)):
for cm in list(dict.fromkeys(compiled_mode)):
for kernelize_on in list(dict.fromkeys(kernelized)):
config = BenchmarkConfig(
attn_implementation=attn_implementation,
sdpa_backend=sdpa_backend,
compile_mode=cm,
kernelize=kernelize_on,
**kwargs,
)
configs.append(config)
return configs
def generate_all_configs(
warmup_iterations: int = 5,
measurement_iterations: int = 20,
batch_size: int = 1,
sequence_length: int = 128,
num_tokens_to_generate: int = 128,
gpu_monitoring: bool = True,
) -> list[BenchmarkConfig]:
all_attn_implementations = [
("flash_attention_2", None),
("eager", None),
("sdpa", "math"),
("sdpa", "flash_attention"),
("flex_attention", None),
]
return cross_generate_configs(
attn_impl_and_sdpa_backend=all_attn_implementations,
compiled_mode=[None, "default", "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"],
kernelized=[False, KERNELIZATION_AVAILABLE],
warmup_iterations=warmup_iterations,
measurement_iterations=measurement_iterations,
batch_size=batch_size,
sequence_length=sequence_length,
num_tokens_to_generate=num_tokens_to_generate,
gpu_monitoring=gpu_monitoring,
)
def generate_main_configs(
warmup_iterations: int = 5,
measurement_iterations: int = 20,
batch_size: int = 1,
sequence_length: int = 128,
num_tokens_to_generate: int = 128,
) -> list[BenchmarkConfig]:
# Create kwargs common to all configs
kwargs = {
"warmup_iterations": warmup_iterations,
"measurement_iterations": measurement_iterations,
"batch_size": batch_size,
"sequence_length": sequence_length,
"num_tokens_to_generate": num_tokens_to_generate,
}
return [ # TODO: test max-autotune instead of default
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=False, **kwargs),
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=True, **kwargs),
BenchmarkConfig(attn_implementation="eager", compile_mode="default", gpu_monitoring=True, **kwargs),
BenchmarkConfig(attn_implementation="flash_attention_2", gpu_monitoring=True, **kwargs),
]

View File

@ -1,456 +0,0 @@
import gc
import json
import logging
import os
import pathlib
import re
import tempfile
import time
from contextlib import nullcontext
from datetime import datetime
from queue import Queue
from typing import Any
import torch
from datasets import Dataset
from huggingface_hub import HfApi
from tqdm import trange
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
CompileConfig,
GenerationConfig,
GenerationMixin,
)
from transformers.generation.streamers import BaseStreamer
from .benchmark_config import BenchmarkConfig
from .data_classes import BenchmarkMetadata, BenchmarkResult, GPURawMetrics, pretty_print_dict
from .hardware_metrics import GPUMonitor
try:
from kernels import Mode, kernelize # noqa: F401
except ImportError:
kernelize = None
Mode = None
DEFAULT_PROMPT = "\n".join([
"The French Revolution was a period of political and societal change in France that began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799.",
"Many of the revolution's ideas are considered fundamental principles of liberal democracy, and its values remain central to modern French political discourse.",
"It was caused by a combination of social, political, and economic factors which the existing regime proved unable to manage.",
"Financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614.",
"The representatives of the Third Estate broke away and re-constituted themselves as a National Assembly in June.",
"The Storming of the Bastille in Paris on 14 July led to a series of radical measures by the Assembly, including the abolition of feudalism, state control over the Catholic Church in France, and issuing the Declaration of the Rights of Man and of the Citizen.",
"The next three years were dominated by a struggle for political control.",
"King Louis XVI's attempted flight to Varennes in June 1791 further discredited the monarchy, and military defeats after the outbreak of the French Revolutionary Wars in April 1792 led to the insurrection of 10 August 1792.",
"As a result, the monarchy was replaced by the French First Republic in September, followed by the execution of Louis XVI himself in January 1793.",
"After another revolt in June 1793, the constitution was suspended, and political power passed from the National Convention to the Committee of Public Safety, dominated by radical Jacobins led by Maximilien Robespierre.",
"About 16,000 people were sentenced by the Revolutionary Tribunal and executed in the Reign of Terror, which ended in July 1794 with the Thermidorian Reaction.",
"Weakened by external threats and internal opposition, the Committee of Public Safety was replaced in November 1795 by the Directory.",
"Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.",
]) # fmt: skip
PUSH_TO_HUB_TOKEN = os.getenv("PUSH_TO_HUB_TOKEN", None)
def compact_json_numeric_arrays(data: dict):
# Match arrays that contain only numbers (ints/floats), whitespace, commas, and newlines
pattern = r"\[\s*\n\s*((?:\d+(?:\.\d+)?\s*,\s*)*\d+(?:\.\d+)?)\s*\n\s*\]"
def replace_numeric_array(match):
# Get the array content
content = match.group(1)
# Remove extra whitespace but keep commas
compact_content = re.sub(r"\s+", " ", content).strip()
return f"[{compact_content}]"
return re.sub(pattern, replace_numeric_array, json.dumps(data, indent=4, default=str), flags=re.DOTALL)
def get_git_revision() -> str:
base_path = pathlib.Path(__file__).parent.parent.parent
git_dir = base_path / ".git"
with (git_dir / "HEAD").open("r") as head:
ref = head.readline().split(" ")[-1].strip()
with (git_dir / ref).open("r") as git_hash:
return git_hash.readline().strip()
def get_sdpa_backend(backend_name: str | None) -> torch.nn.attention.SDPBackend | None:
"""Get the SDPA backend enum from string name."""
if backend_name is None:
return None
try:
backend_map = {
"math": torch.nn.attention.SDPBackend.MATH,
"flash_attention": torch.nn.attention.SDPBackend.FLASH_ATTENTION,
"efficient_attention": torch.nn.attention.SDPBackend.EFFICIENT_ATTENTION,
"cudnn_attention": torch.nn.attention.SDPBackend.CUDNN_ATTENTION,
}
return backend_map.get(backend_name.lower())
except AttributeError:
# torch.nn.attention.SDPBackend not available in older torch versions
return None
def flush_memory():
"""Flush GPU memory and run garbage collection."""
gc.collect()
# Dynamo resets
torch._dynamo.reset()
torch._dynamo.reset_code_caches()
if hasattr(torch._inductor, "codecache"):
# Clear FX graph cache
if hasattr(torch._inductor.codecache, "FxGraphCache"):
torch._inductor.codecache.FxGraphCache.clear()
# Clear PyCodeCache
if hasattr(torch._inductor.codecache, "PyCodeCache"):
torch._inductor.codecache.PyCodeCache.cache_clear()
# Clear TritonFuture cache (for async compilation)
if hasattr(torch._inductor.codecache, "TritonFuture"):
if hasattr(torch._inductor.codecache.TritonFuture, "_compile_cache"):
torch._inductor.codecache.TritonFuture._compile_cache.clear()
# Clear CUDA cache
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
torch.cuda.synchronize()
gc.collect()
class BenchmarkStreamer(BaseStreamer):
def __init__(self, **kwargs) -> None:
self.timeout = kwargs.pop("timeout", 10)
self.timestamps = []
self.text_queue = Queue()
self.stop_signal = None
def put(self, value):
"""Receives tokens and logs the timestamp of the generation."""
self.timestamps.append(time.perf_counter())
self.text_queue.put(value)
def end(self):
self.timestamps.append(time.perf_counter())
self.text_queue.put(self.stop_signal)
def __iter__(self):
return self
def __next__(self):
value = self.text_queue.get(timeout=self.timeout)
if value == self.stop_signal:
raise StopIteration()
else:
return value
class BenchmarkRunner:
"""Main benchmark runner that coordinates benchmark execution."""
def __init__(
self,
logger: logging.Logger,
output_dir: str | None = None,
branch_name: str | None = None,
commit_id: str | None = None,
commit_message: str | None = None,
) -> None:
# Those stay constant for the whole run
self.logger = logger
if output_dir is None:
output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "benchmark_results")
self.output_dir = output_dir
self.branch_name = branch_name
self.commit_id = get_git_revision() if commit_id is None else commit_id
self.commit_message = commit_message
os.makedirs(self.output_dir, exist_ok=True)
self.profile_dir = None
# Attributes that are reset for each model
self._setup_for = ""
# Attributes that are reset for each run
self.model: GenerationMixin | None = None
def cleanup(self) -> None:
del self.model
self.model = None
flush_memory()
def setup_benchmark(self, model_id: str, config: BenchmarkConfig) -> None:
# Some attributes only need to be set once per model
if self._setup_for != model_id:
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
# We set the EOS token to the padding token for open-ended generation
self.tokenizer.eos_token = self.tokenizer.pad_token
self._setup_for = model_id
# Prepare inputs
self.inputs = self.tokenizer(
[DEFAULT_PROMPT for _ in range(config.batch_size)],
return_tensors="pt",
max_length=config.sequence_length,
truncation=True,
return_attention_mask=True,
).to(config.device)
self.inputs["use_cache"] = True
# Prepare generation config
gen_config = GenerationConfig(
do_sample=False, top_p=1.0, temperature=1.0, max_new_tokens=config.num_tokens_to_generate
)
# Prepare compile config
if config.compile_mode is not None:
gen_config.compile_config = CompileConfig(mode=config.compile_mode, options=config.compile_options)
gen_config.cache_implementation = "static"
# Load model
self.logger.debug(f"Loading model {model_id} on device {config.device}...")
dtype = getattr(torch, config.dtype.removeprefix("torch."))
self.model = AutoModelForCausalLM.from_pretrained(
model_id, dtype=dtype, attn_implementation=config.attn_implementation, generation_config=gen_config
)
self.model = self.model.eval().to(config.device)
# Kernelize the model if needed
if config.kernelize and kernelize is not None and Mode is not None:
self.model = kernelize(self.model, mode=Mode.INFERENCE)
def run_benchmark(
self, model_id: str, config: BenchmarkConfig, num_tokens_to_profile: int = 0
) -> dict[str, Any] | None:
"""Run a single benchmark with the given model ID and config."""
sdpa_ctx = nullcontext()
if config.attn_implementation == "sdpa":
sdpa_backend = get_sdpa_backend(config.sdpa_backend)
sdpa_ctx = torch.nn.attention.sdpa_kernel(sdpa_backend)
with sdpa_ctx, torch.no_grad():
self.logger.info(f"Running benchmark scenario: {config.name}")
# Quick validation: try one measurement first to see if this scenario works
flush_memory()
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
max_new_tokens=1, gpu_monitor=None
)
if e2e_latency < 0:
self.logger.warning(f"Skipping config {config.name}: {e2e_latency = } (no GPU monitoring)")
return None
# Warmup runs
self.logger.info(f"Warming up with {config.warmup_iterations} iterations...")
for _ in trange(config.warmup_iterations):
_ = self.time_generate(max_new_tokens=config.num_tokens_to_generate)
self.logger.info("Warmup over.")
# Measurement runs
result = BenchmarkResult()
self.logger.info(f"Benchmarking with {config.measurement_iterations} iterations.")
for _ in trange(config.measurement_iterations):
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
max_new_tokens=config.num_tokens_to_generate,
gpu_monitor=(GPUMonitor(logger=self.logger) if config.gpu_monitoring else None),
)
result.accumulate(e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics)
self.logger.info("Benchmarking done. Cleaning up.")
# Profile if needed
if num_tokens_to_profile > 0:
self.profile_generate(num_tokens_to_profile, config.name)
return {
"metadata": BenchmarkMetadata(
model_id=model_id,
branch_name=self.branch_name,
commit_id=self.commit_id,
commit_message=self.commit_message,
),
"measurements": result,
"config": config,
}
def time_generate(
self,
max_new_tokens: int,
gpu_monitor: GPUMonitor | None = None,
) -> tuple[float, list[float], str, GPURawMetrics | None]:
"""Time the latency of a call to model.generate() with the given (inputs) and (max_new_tokens)."""
# Prepare gpu monitoring if needed
if gpu_monitor is not None:
gpu_monitor.start()
# Prepare streamer
streamer = BenchmarkStreamer()
# Generate and time
wall_time_0 = time.perf_counter()
outputs = self.model.generate(
**self.inputs,
max_new_tokens=max_new_tokens,
streamer=streamer,
)
wall_time_1 = time.perf_counter()
# Stop gpu monitoring if needed
gpu_metrics = gpu_monitor.stop_and_collect() if gpu_monitor is not None else None
# Check if generation had the right number of tokens
input_tokens = self.inputs["input_ids"].size(-1)
batch_size, output_tokens = outputs.shape
new_tokens = output_tokens - input_tokens
if new_tokens != max_new_tokens:
raise RuntimeError(f"Generated {new_tokens} tokens, expected {max_new_tokens}")
# Decode outputs
decoded_output = self.tokenizer.decode(outputs[0, input_tokens:], skip_special_tokens=True)
shape_and_decoded_output = f"{tuple(outputs.shape)} | {decoded_output}"
# Compute intermediate quantities
e2e_latency = wall_time_1 - wall_time_0
token_generation_times = [t - wall_time_0 for t in streamer.timestamps[1:]]
return e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics
def profile_generate(self, num_tokens_to_profile: int, config_name: str) -> None:
"""Profile the latency of a call to model.generate() with the given (inputs) and (max_new_tokens)."""
profiler = torch.profiler.profile(
activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
record_shapes=True,
)
with profiler as prof:
_ = self.model.generate(
**self.inputs,
max_new_tokens=num_tokens_to_profile,
)
if self.profile_dir is None:
self.profile_dir = self.output_dir + "_profiles"
os.makedirs(self.profile_dir, exist_ok=True)
prof.export_chrome_trace(f"{self.profile_dir}/{config_name}.json")
def run_benchmarks(
self,
model_id: str,
benchmark_configs: list[BenchmarkConfig],
num_tokens_to_profile: int = 0,
pretty_print_summary: bool = True,
) -> tuple[str, dict[str, Any]]:
"""Run multiple benchmarks for the given model ID and list of benchmark configs."""
all_results = {}
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
start_time = time.perf_counter()
n_configs = len(benchmark_configs)
for i, config in enumerate(benchmark_configs):
# Handle SDPA backend if not determined by the config (needs to be done before skipping duplicates)
if config.attn_implementation == "sdpa" and config.sdpa_backend is None:
default_backend = "flash_attention" # FIXME: torch has a _cur_sdpa_kernel_backends but it fails
self.logger.warning(f"No SDPA backend provided, using {default_backend} instead.")
config.sdpa_backend = default_backend
# Skip if already run
if config.hash in all_results:
self.logger.info(f"Skipping duplicate config {config.name} for model {model_id} ({i + 1}/{n_configs})")
continue
# Otherwise, run the benchmark
self.setup_benchmark(model_id, config)
self.logger.info(
f"Running benchmark of model {model_id} with scenario: {config.name} ({i + 1}/{n_configs})"
)
# Launch benchmark in a try/except block to avoid stopping the whole run if one benchmark fails
try:
results = self.run_benchmark(model_id, config, num_tokens_to_profile)
if results is not None:
all_results[config.hash] = results
except Exception as e:
self.logger.error(f"Error running with scenario: {config.name}:\n{repr(e)}")
# Cleanup model and save results
self.cleanup()
self.save_results(model_id, all_results, timestamp=timestamp)
if pretty_print_summary:
print()
print("=" * 100)
print(f"Finished benchmarks in {time.perf_counter() - start_time:.2f} seconds")
print(f"Total number of benchmarks: {len(all_results)}")
if len(all_results) > 0:
print("First run metadata:")
first_key = list(all_results.keys())[0]
first_metadata = all_results[first_key]["metadata"].to_dict()
hardware_info = first_metadata.pop("hardware_info")
pretty_print_dict(first_metadata | hardware_info, tabs=1)
for result in all_results.values():
print("=" * 100)
print(f"Config: {result['config'].infer_name(compact=False)}\n")
result["measurements"].pprint(batch_size=result["config"].batch_size, tabs=1)
print("=" * 100)
return (timestamp, all_results)
def save_results(self, model_name: str, results: dict, timestamp: str = "") -> str:
"""Save benchmark results to JSON file."""
# Create model-specific subdirectory
model_name = model_name.replace("/", "_")
model_dir = os.path.join(self.output_dir, model_name)
os.makedirs(model_dir, exist_ok=True)
# Create filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if not timestamp else timestamp
filename = f"{model_name}_benchmark_{timestamp}.json"
filepath = os.path.join(model_dir, filename)
# Convert results to dict
converted_results = {}
for cfg_hash in results.keys():
converted_results[cfg_hash] = {
"metadata": results[cfg_hash]["metadata"].to_dict(),
"measurements": results[cfg_hash]["measurements"].to_dict(),
"config": results[cfg_hash]["config"].to_dict(),
}
# Save to JSON file
with open(filepath, "w") as f:
f.write(compact_json_numeric_arrays(converted_results))
self.logger.info(f"Results saved to {filepath}")
return filepath
def push_results_to_hub(self, dataset_id: str, results: dict[Any, Any], timestamp: str) -> None:
if PUSH_TO_HUB_TOKEN is None:
raise ValueError(
"PUSH_TO_HUB_TOKEN is not set, cannot push results to the Hub. When setting dataset_id, please also set the PUSH_TO_HUB_TOKEN environment variable."
)
n_results = len(results)
self.logger.info(f"Pushing {n_results} results to: {dataset_id}")
rows = []
for cfg_hash, entry in results.items():
row = {
"benchmark_config_hash": cfg_hash,
"config": entry["config"].to_dict(),
"measurements": entry["measurements"].to_dict(),
"metadata": entry["metadata"].to_dict(),
}
rows.append(row)
ds = Dataset.from_list(rows)
with tempfile.TemporaryDirectory() as tmp:
jsonl_path = os.path.join(tmp, "data.jsonl")
with open(jsonl_path, "w") as f:
json_lines = []
for ex in ds:
json_lines.append(json.dumps(ex, ensure_ascii=False))
f.write("\n".join(json_lines))
api = HfApi()
# NOTE: we expect the repository to already exist
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if not timestamp else timestamp
file_name = f"benchmark_run_{timestamp}.jsonl"
api.upload_file(
path_or_fileobj=jsonl_path,
path_in_repo=file_name,
repo_id=dataset_id,
repo_type="dataset",
token=PUSH_TO_HUB_TOKEN,
)
self.logger.info(f"Succesfully uploaded results to: {dataset_id}")

View File

@ -1,167 +0,0 @@
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
import numpy as np
from .hardware_metrics import GPURawMetrics, HardwareInfo
def compute_basic_statistics(measurements: list[float]) -> dict[str, float]:
return {
"avg": np.mean(measurements),
"std": np.std(measurements),
"min": np.min(measurements),
"med": np.median(measurements),
"max": np.max(measurements),
"p95": np.percentile(measurements, 95),
}
def add_unit_to_duration(stats: dict[str, float]) -> dict[str, str]:
for key in list(stats.keys()):
value = stats[key]
if value > 3600:
stats[key] = f"{(value / 3600):.2f}hr"
elif value > 60:
stats[key] = f"{(value / 60):.2f}min"
elif value > 1:
stats[key] = f"{value:.2f}s"
elif value > 1e-3:
stats[key] = f"{(value * 1e3):.2f}ms"
elif value > 1e-6:
stats[key] = f"{(value * 1e6):.2f}us"
else:
stats[key] = f"{(value * 1e9):.2f}ns"
return stats
def equalize_lengths_and_collate(stats: list[dict[str, str]]) -> list[str]:
keys = ["avg", "std", "min", "med", "max", "p95"]
for key in keys:
max_length = max(len(stat[key]) for stat in stats)
for stat in stats:
stat[key] = stat[key].ljust(max_length, " ")
return [" ".join([f"{key}={stat[key]}" for key in keys]) for stat in stats]
def pretty_print_dict(data: dict[str, Any], tabs: int = 0) -> None:
max_key_length = max([len(key) for key in data.keys()])
for key, value in data.items():
tabs_str = " " * tabs
padded_key = key.ljust(max_key_length + 1, ".")
print(f"{tabs_str}{padded_key}: {value}")
@dataclass
class BenchmarkMetadata:
"""Metadata collected for each benchmark run."""
model_id: str
timestamp: str
branch_name: str
commit_id: str
commit_message: str
hardware_info: HardwareInfo
def __init__(self, model_id: str, commit_id: str, branch_name: str = "main", commit_message: str = "") -> None:
self.model_id = model_id
self.timestamp = datetime.now(timezone.utc).isoformat()
self.branch_name = branch_name
self.commit_id = commit_id
self.commit_message = commit_message
self.hardware_info = HardwareInfo()
def to_dict(self) -> dict[str, Any]:
return {
"model_id": self.model_id,
"timestamp": self.timestamp,
"branch_name": self.branch_name,
"commit_id": self.commit_id,
"commit_message": self.commit_message,
"hardware_info": self.hardware_info.to_dict(),
}
class BenchmarkResult:
"""Result from a series of benchmark runs."""
def __init__(self) -> None:
self.e2e_latency = []
self.token_generation_times = [] # time at which each token was generated (relative to start of the generation)
self.shape_and_decoded_outputs = []
self.gpu_metrics = []
def accumulate(
self,
e2e_latency: float,
token_generation_times: list[float],
shape_and_decoded_output: str,
gpu_metrics: GPURawMetrics | None,
) -> None:
self.e2e_latency.append(e2e_latency)
self.token_generation_times.append(token_generation_times)
self.shape_and_decoded_outputs.append(shape_and_decoded_output)
self.gpu_metrics.append(gpu_metrics)
def to_dict(self) -> dict[str, None | int | float]:
# Save GPU metrics as None if it contains only None values
if all(gm is None for gm in self.gpu_metrics):
gpu_metrics = None
else:
gpu_metrics = [gm.to_dict() for gm in self.gpu_metrics]
return {
"e2e_latency": self.e2e_latency,
"token_generation_times": self.token_generation_times,
"shape_and_decoded_outputs": self.shape_and_decoded_outputs,
"gpu_metrics": gpu_metrics,
}
@classmethod
def from_dict(cls, data: dict[str, None | int | float]) -> "BenchmarkResult":
# Handle GPU metrics, which is saved as None if it contains only None values
if data["gpu_metrics"] is None:
gpu_metrics = [None for _ in range(len(data["e2e_latency"]))]
else:
gpu_metrics = [GPURawMetrics.from_dict(gm) for gm in data["gpu_metrics"]]
# Create a new instance and accumulate the data
new_instance = cls()
for i in range(len(data["e2e_latency"])):
new_instance.accumulate(
e2e_latency=data["e2e_latency"][i],
token_generation_times=data["token_generation_times"][i],
shape_and_decoded_output=data["shape_and_decoded_outputs"][i],
gpu_metrics=gpu_metrics[i],
)
return new_instance
def get_measured_ttft(self) -> list[float]:
return [dt[0] for dt in self.token_generation_times if len(dt) > 0]
def get_measured_itl(self) -> list[float]:
return [(dt[-1] - dt[0]) / (len(dt) - 1) for dt in self.token_generation_times if len(dt) > 1]
def get_throughput(self, batch_size: int) -> float:
return [
batch_size * len(dt) / e2e_latency
for e2e_latency, dt in zip(self.e2e_latency, self.token_generation_times)
]
def pprint(self, batch_size: int = 0, tabs: int = 0) -> None:
stats_to_collate = [
add_unit_to_duration(compute_basic_statistics(self.e2e_latency)),
add_unit_to_duration(compute_basic_statistics(self.get_measured_ttft())),
add_unit_to_duration(compute_basic_statistics(self.get_measured_itl())),
]
if batch_size > 0:
throughput_stats = compute_basic_statistics(self.get_throughput(batch_size))
stats_to_collate.append({key: f"{value:.2f}tok/s" for key, value in throughput_stats.items()})
collated_stats = equalize_lengths_and_collate(stats_to_collate)
dict_to_pprint = {
"E2E Latency": collated_stats[0],
"Time to First Token": collated_stats[1],
"Inter-Token Latency": collated_stats[2],
}
if batch_size > 0:
dict_to_pprint["Throughput"] = collated_stats[3]
pretty_print_dict(dict_to_pprint, tabs=tabs)

View File

@ -1,171 +0,0 @@
import json
import logging
import subprocess
import sys
import threading
import time
from dataclasses import dataclass
from enum import Enum
from logging import Logger
import gpustat
import psutil
import torch
# Data class to hold the hardware information
def get_device_name_and_memory_total() -> tuple[str, float]:
"""Returns the name and memory total of GPU 0."""
device_name = torch.cuda.get_device_properties(0).name
device_memory_total = torch.cuda.get_device_properties(0).total_memory / 1024**3
return device_name, device_memory_total
class HardwareInfo:
"""A class to hold information about the hardware."""
def __init__(self) -> None:
# Retrieve GPU stats
try:
self.gpu_name, self.gpu_memory_total_gb = get_device_name_and_memory_total()
except Exception:
self.gpu_name, self.gpu_memory_total_gb = None, None
# Retrieve python, torch and CUDA version
self.python_version = f"{sys.version.split()[0]}"
self.torch_version = torch.__version__
if hasattr(torch, "cuda") and torch.cuda.is_available():
self.cuda_version = torch.version.cuda
else:
self.cuda_version = None
# Retrieve general hardware information
self.cpu_count = psutil.cpu_count()
self.memory_total_mb = int(psutil.virtual_memory().total / (1024 * 1024))
def to_dict(self) -> dict[str, None | int | float | str]:
return {
"gpu_name": self.gpu_name,
"gpu_memory_total_gb": self.gpu_memory_total_gb,
"python_version": self.python_version,
"torch_version": self.torch_version,
}
# Functions to get information about the GPU
def get_amd_gpu_stats() -> tuple[int, float]:
"""Returns the utilization and memory used of an AMD GPU, both in percent"""
rocm_smi_output = subprocess.check_output(["rocm-smi", "--json", "--showuse", "--showmeminfo", "VRAM"])
gpu_stats = json.loads(rocm_smi_output.decode("utf-8"))
gpu_stats = [
(card_id, stats["GPU use (%)"], stats["VRAM Total Used Memory (B)"]) for card_id, stats in gpu_stats.items()
]
gpu_stats.sort(key=lambda x: x[1], reverse=True)
return int(gpu_stats[0][1]), float(gpu_stats[0][2]) / 1024**3
def get_nvidia_gpu_stats() -> tuple[int, float]:
"""Returns the utilization and memory used of an NVIDIA GPU, both in percent"""
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_stats = gpu_stats[0]
return int(gpu_stats["utilization.gpu"]), float(gpu_stats["memory.used"]) / 1024**3
class GPUStatsCollector:
"""A class to get statistics about the GPU. It serves as a wrapper that holds the GPU total memory and its name,
which is used to call the right function to get the utilization and memory used."""
def __init__(self) -> None:
self.device_name, self.device_memory_total = get_device_name_and_memory_total()
# Monkey patch the get_utilization_and_memory_used method based on the GPU type
if "amd" in self.device_name.lower():
self.get_utilization_and_memory_used = get_amd_gpu_stats
elif "nvidia" in self.device_name.lower():
self.get_utilization_and_memory_used = get_nvidia_gpu_stats
else:
raise RuntimeError(f"Unsupported GPU: {self.device_name}")
def get_measurements(self) -> tuple[int, float]:
"""Get the utilization and memory used of the GPU, both in percent"""
raise NotImplementedError("This method is meant to be monkey patched during __init__")
# Simple data classes to hold the raw GPU metrics
class GPUMonitoringStatus(Enum):
"""Status of GPU monitoring."""
SUCCESS = "success"
FAILED = "failed"
NO_GPUS_AVAILABLE = "no_gpus_available"
NO_SAMPLES_COLLECTED = "no_samples_collected"
@dataclass
class GPURawMetrics:
"""Raw values for GPU utilization and memory used."""
utilization: list[float] # in percent
memory_used: list[float] # in GB
timestamps: list[float] # in seconds
timestamp_0: float # in seconds
monitoring_status: GPUMonitoringStatus
def to_dict(self) -> dict[str, None | int | float | str]:
return {
"utilization": self.utilization,
"memory_used": self.memory_used,
"timestamps": self.timestamps,
"timestamp_0": self.timestamp_0,
"monitoring_status": self.monitoring_status.value,
}
# Main class, used to monitor the GPU utilization during benchmark execution
class GPUMonitor:
"""Monitor GPU utilization during benchmark execution."""
def __init__(self, sample_interval_sec: float = 0.1, logger: Logger | None = None):
self.sample_interval_sec = sample_interval_sec
self.logger = logger if logger is not None else logging.getLogger(__name__)
self.num_available_gpus = torch.cuda.device_count()
if self.num_available_gpus == 0:
raise RuntimeError("No GPUs detected by torch.cuda.device_count().")
self.gpu_stats_getter = GPUStatsCollector()
def start(self):
"""Start monitoring GPU metrics."""
# Clear the stop event to enable monitoring
self.stop_event = threading.Event()
self.gpu_utilization = []
self.gpu_memory_used = []
self.timestamps = []
self.thread = threading.Thread(target=self._monitor_loop)
self.thread.start()
self.logger.debug("GPU monitoring started")
def stop_and_collect(self) -> GPURawMetrics:
"""Stop monitoring and return collected metrics."""
self.stop_event.set()
self.thread.join()
if self.gpu_utilization:
timestamp_0 = self.timestamps[0]
metrics = GPURawMetrics(
utilization=self.gpu_utilization,
memory_used=self.gpu_memory_used,
timestamps=[t - timestamp_0 for t in self.timestamps],
timestamp_0=timestamp_0,
monitoring_status=GPUMonitoringStatus.SUCCESS,
)
self.logger.debug(f"GPU monitoring completed: {len(self.gpu_utilization)} samples collected")
else:
metrics = GPURawMetrics(monitoring_status=GPUMonitoringStatus.NO_SAMPLES_COLLECTED)
return metrics
def _monitor_loop(self):
"""Background monitoring loop using threading.Event for communication."""
while not self.stop_event.is_set():
utilization, memory_used = self.gpu_stats_getter.get_utilization_and_memory_used()
self.gpu_utilization.append(utilization)
self.gpu_memory_used.append(memory_used)
self.timestamps.append(time.time())
if self.stop_event.wait(timeout=self.sample_interval_sec):
break

View File

@ -1,7 +0,0 @@
numpy>=1.21.0
psutil>=5.8.0
gpustat>=1.0.0
torch>=2.0.0
transformers>=4.30.0
datasets>=2.10.0
huggingface_hub>=0.16.0

View File

@ -1,142 +0,0 @@
#!/usr/bin/env python3
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Top-level benchmarking script that automatically discovers and runs all benchmarks
in the ./benches directory, organizing outputs into model-specific subfolders.
"""
import argparse
import logging
import sys
import uuid
from framework.benchmark_config import BenchmarkConfig, generate_all_configs, generate_main_configs
from framework.benchmark_runner import BenchmarkRunner
if __name__ == "__main__":
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("--output-dir", type=str, default=None, help="Output dir for benchmark results")
parser.add_argument("--log-level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO")
parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)")
parser.add_argument("--warmup", "-w", type=int, default=3, help="Number of warmup iterations")
parser.add_argument("--iterations", "-i", type=int, default=10, help="Number of measurement iterations")
parser.add_argument("--batch-size", "-b", type=int, nargs="+", help="Batch size")
parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length")
parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate")
parser.add_argument("--cross-generate", action="store_true", help="Cross-generate all combinations of configs")
parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile")
parser.add_argument("--branch-name", type=str, help="Git branch name")
parser.add_argument("--commit-id", type=str, help="Git commit ID (if not provided, will auto-detect from git)")
parser.add_argument("--commit-message", type=str, help="Git commit message")
parser.add_argument(
"--no-gpu-monitoring", action="store_true", help="Disables GPU monitoring during benchmark runs"
)
parser.add_argument(
"--push-result-to-dataset",
type=str,
default=None,
help="Name of the dataset to push results to. If not provided, results are not pushed to the Hub.",
)
args = parser.parse_args()
# Setup logging
benchmark_run_uuid = str(uuid.uuid4())[:8]
numeric_level = getattr(logging, args.log_level.upper())
handlers = [logging.StreamHandler(sys.stdout)]
logging.basicConfig(
level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers
)
logger = logging.getLogger("benchmark_v2")
logger.info("Starting benchmark discovery and execution")
logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
logger.info(f"Output directory: {args.output_dir}")
# Error out if one of the arguments is not provided
if len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 0:
raise ValueError(
"At least one of the arguments --batch-size, --sequence-length, or --num-tokens-to-generate is required"
)
# If there is only one (batch_size, sequence_length, num_tokens_to_generate), we benchmark across configs
elif len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 1:
if args.cross_generate:
benchmark_configs = generate_all_configs(
warmup_iterations=args.warmup,
measurement_iterations=args.iterations,
batch_size=args.batch_size[0],
sequence_length=args.sequence_length[0],
num_tokens_to_generate=args.num_tokens_to_generate[0],
gpu_monitoring=not args.no_gpu_monitoring,
)
else:
benchmark_configs = generate_main_configs(
warmup_iterations=args.warmup,
measurement_iterations=args.iterations,
batch_size=args.batch_size[0],
sequence_length=args.sequence_length[0],
num_tokens_to_generate=args.num_tokens_to_generate[0],
)
# Otherwise, we benchmark across all combinations of dimensions
else:
main_config = generate_main_configs(
warmup_iterations=args.warmup,
measurement_iterations=args.iterations,
batch_size=args.batch_size[0],
sequence_length=args.sequence_length[0],
num_tokens_to_generate=args.num_tokens_to_generate[0],
)[0]
benchmark_configs = []
for num_tokens_to_generate in args.num_tokens_to_generate:
for sequence_length in args.sequence_length:
for batch_size in args.batch_size:
cfg_dict = main_config.to_dict()
cfg_dict["batch_size"] = batch_size
cfg_dict["sequence_length"] = sequence_length
cfg_dict["num_tokens_to_generate"] = num_tokens_to_generate
cfg_dict.pop("name")
benchmark_configs.append(BenchmarkConfig.from_dict(cfg_dict))
runner = BenchmarkRunner(
logger,
args.output_dir,
args.branch_name,
args.commit_id,
args.commit_message,
)
timestamp, results = runner.run_benchmarks(
args.model_id,
benchmark_configs,
args.num_tokens_to_profile,
pretty_print_summary=True,
)
dataset_id = args.push_result_to_dataset
if dataset_id is not None and len(results) > 0:
runner.push_results_to_hub(
dataset_id,
results,
timestamp,
)

View File

@ -16,7 +16,6 @@
# by pytest before any tests are run
import doctest
import os
import sys
import warnings
from os.path import abspath, dirname, join
@ -24,18 +23,12 @@ from os.path import abspath, dirname, join
import _pytest
import pytest
from transformers.testing_utils import (
HfDoctestModule,
HfDocTestParser,
is_torch_available,
patch_testing_methods_to_collect_info,
patch_torch_compile_force_graph,
)
from transformers.testing_utils import HfDoctestModule, HfDocTestParser
NOT_DEVICE_TESTS = {
"test_tokenization",
"test_tokenization_mistral_common",
"test_processor",
"test_processing",
"test_beam_constraints",
"test_configuration_utils",
@ -53,21 +46,32 @@ NOT_DEVICE_TESTS = {
"test_keep_in_fp32_modules",
"test_gradient_checkpointing_backward_compatibility",
"test_gradient_checkpointing_enable_disable",
"test_save_load_fast_init_from_base",
"test_fast_init_context_manager",
"test_fast_init_tied_embeddings",
"test_save_load_fast_init_to_base",
"test_torch_save_load",
"test_initialization",
"test_forward_signature",
"test_model_get_set_embeddings",
"test_model_main_input_name",
"test_correct_missing_keys",
"test_tie_model_weights",
"test_can_use_safetensors",
"test_load_save_without_tied_weights",
"test_tied_weights_keys",
"test_model_weights_reload_no_missing_tied_weights",
"test_can_load_ignoring_mismatched_shapes",
"test_pt_tf_model_equivalence",
"test_mismatched_shapes_have_properly_initialized_weights",
"test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
"test_model_is_small",
"test_tf_from_pt_safetensors",
"test_flax_from_pt_safetensors",
"ModelTest::test_pipeline_", # None of the pipeline tests from PipelineTesterMixin (of which XxxModelTest inherits from) are running on device
"ModelTester::test_pipeline_",
"/repo_utils/",
"/utils/",
"/agents/",
}
# allow having multiple repository checkouts and not needing to remember to rerun
@ -81,14 +85,17 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
def pytest_configure(config):
config.addinivalue_line(
"markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
)
config.addinivalue_line(
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
)
config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
config.addinivalue_line("markers", "agent_tests: mark the agent tests that are run on their specific schedule")
config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")
os.environ["DISABLE_SAFETENSORS_CONVERSION"] = "true"
def pytest_collection_modifyitems(items):
@ -133,18 +140,3 @@ class CustomOutputChecker(OutputChecker):
doctest.OutputChecker = CustomOutputChecker
_pytest.doctest.DoctestModule = HfDoctestModule
doctest.DocTestParser = HfDocTestParser
if is_torch_available():
import torch
# The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
# We set it to `False` for CI. See https://github.com/pytorch/pytorch/issues/157274#issuecomment-3090791615
torch.backends.cudnn.allow_tf32 = False
# patch `torch.compile`: if `TORCH_COMPILE_FORCE_FULLGRAPH=1` (or values considered as true, e.g. yes, y, etc.),
# the patched version will always run with `fullgraph=True`.
patch_torch_compile_force_graph()
if os.environ.get("PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS", "").lower() in ("yes", "true", "on", "y", "1"):
patch_testing_methods_to_collect_info()

View File

@ -2,7 +2,7 @@
In this folder you will find various docker files, and some subfolders.
- dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB).
- subfolders contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
- subfloder contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated.

View File

@ -4,11 +4,13 @@ USER root
ARG REF=main
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
ENV UV_PYTHON=/usr/local/bin/python
RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython
RUN uv pip install --no-cache-dir --upgrade 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
# tensorflow pin matching setup.py
RUN uv pip install --no-cache-dir pypi-kenlm
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[quality,testing,torch-speech,vision]"
RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
RUN git lfs install
RUN uv pip uninstall transformers
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean

View File

@ -1,10 +1,9 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler git-lfs curl
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
@ -15,21 +14,13 @@ RUN mv catch.hpp ../libs/
RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
RUN make install -j 10
WORKDIR /
RUN uv pip install --no-cache --upgrade 'torch<2.9' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite
RUN uv pip install --no-cache-dir "transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
# spacy is not used so not tested. Causes to failures. TODO fix later
RUN uv run python -m unidic download
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
RUN uv pip uninstall transformers
RUN python3 -m unidic download
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt remove -y g++ cmake xz-utils libprotobuf-dev protobuf-compiler

View File

@ -0,0 +1,12 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
RUN apt-get install -y g++ cmake
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv
RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval
RUN pip install --upgrade --no-cache-dir "transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
RUN uv pip install --no-cache-dir "protobuf==3.20.3"
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@ -1,19 +1,11 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec<0.8' --index-url https://download.pytorch.org/whl/cpu
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
RUN uv pip uninstall transformers
RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@ -2,23 +2,16 @@ FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1 g++ tesseract-ocr git-lfs curl
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps timm accelerate
RUN uv pip install -U --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose' 'dataset'
RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose' 'dataset'
# RUN git clone https://github.com/facebookresearch/detectron2.git
# RUN python3 -m pip install --no-cache-dir -e detectron2
RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
RUN uv pip uninstall transformers
RUN pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3'
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@ -0,0 +1,10 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean

View File

@ -0,0 +1,10 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
RUN uv pip install --no-cache-dir "protobuf==3.20.3" tensorflow_probability
RUN apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@ -2,17 +2,10 @@ FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg curl
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec<0.8' --index-url https://download.pytorch.org/whl/cpu
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
RUN uv pip uninstall transformers
RUN pip uninstall -y transformers

View File

@ -4,6 +4,6 @@ ARG REF=main
USER root
RUN apt-get update && apt-get install -y time git
ENV UV_PYTHON=/usr/local/bin/python
RUN pip install uv
RUN pip install uv && uv venv
RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3
RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*

Some files were not shown because too many files have changed in this diff Show More