mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-24 03:24:37 +08:00
Compare commits
2 Commits
add-block-
...
check_env_
Author | SHA1 | Date | |
---|---|---|---|
128322bad4 | |||
2a281d11d2 |
@ -7,18 +7,6 @@ parameters:
|
||||
nightly:
|
||||
type: boolean
|
||||
default: false
|
||||
GHA_Actor:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Action:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Event:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Meta:
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
jobs:
|
||||
# Ensure running with CircleCI/huggingface
|
||||
@ -43,6 +31,14 @@ jobs:
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: if [[ "$CIRCLE_PULL_REQUEST" == "" && "$CIRCLE_BRANCH" != "main" && "$CIRCLE_BRANCH" != *-release ]]; then echo "Not a PR, not the main branch and not a release branch, skip test!"; circleci-agent step halt; fi
|
||||
- run: 'curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/${CIRCLE_PULL_REQUEST##*/} >> github.txt'
|
||||
- run: cat github.txt
|
||||
- run: (python3 -c 'import json; from datetime import datetime; fp = open("github.txt"); data = json.load(fp); fp.close(); f = "%Y-%m-%dT%H:%M:%SZ"; created = datetime.strptime(data["created_at"], f); updated = datetime.strptime(data["updated_at"], f); s = (updated - created).total_seconds(); print(int(s))' || true) > elapsed.txt
|
||||
- run: if [ "$(cat elapsed.txt)" == "" ]; then echo 60 > elapsed.txt; fi
|
||||
- run: cat elapsed.txt
|
||||
- run: if [ "$(cat elapsed.txt)" -lt "30" ]; then echo "PR is just opened, wait some actions from GitHub"; sleep 30; fi
|
||||
- run: 'if grep -q "\"draft\": true," github.txt; then echo "draft mode, skip test!"; circleci-agent step halt; fi'
|
||||
- run: uv pip install -U -e .
|
||||
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
|
||||
- run: mkdir -p test_preparation
|
||||
@ -112,6 +108,8 @@ jobs:
|
||||
|
||||
- run:
|
||||
name: "Retrieve Artifact Paths"
|
||||
env:
|
||||
CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
|
||||
command: |
|
||||
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
|
||||
job_number=${CIRCLE_BUILD_NUM}
|
||||
@ -184,7 +182,6 @@ jobs:
|
||||
- run: python utils/check_dummies.py
|
||||
- run: python utils/check_repo.py
|
||||
- run: python utils/check_inits.py
|
||||
- run: python utils/check_pipeline_typing.py
|
||||
- run: python utils/check_config_docstrings.py
|
||||
- run: python utils/check_config_attributes.py
|
||||
- run: python utils/check_doctest_list.py
|
||||
|
@ -28,8 +28,6 @@ COMMON_ENV_VARIABLES = {
|
||||
"TRANSFORMERS_IS_CI": True,
|
||||
"PYTEST_TIMEOUT": 120,
|
||||
"RUN_PIPELINE_TESTS": False,
|
||||
# will be adjust in `CircleCIJob.to_dict`.
|
||||
"RUN_FLAKY": True,
|
||||
}
|
||||
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
|
||||
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
|
||||
@ -128,8 +126,6 @@ class CircleCIJob:
|
||||
|
||||
def to_dict(self):
|
||||
env = COMMON_ENV_VARIABLES.copy()
|
||||
# Do not run tests decorated by @is_flaky on pull requests
|
||||
env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
|
||||
env.update(self.additional_env)
|
||||
|
||||
job = {
|
||||
@ -155,6 +151,7 @@ class CircleCIJob:
|
||||
"checkout",
|
||||
{"attach_workspace": {"at": "test_preparation"}},
|
||||
{"run": "apt-get update && apt-get install -y curl"},
|
||||
{"run": "echo $TRANSFORMERS_IS_CI"},
|
||||
{"run": " && ".join(self.install_steps)},
|
||||
{"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
|
||||
{"run": {
|
||||
@ -175,7 +172,6 @@ class CircleCIJob:
|
||||
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
|
||||
}
|
||||
},
|
||||
{"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}},
|
||||
{"run": {
|
||||
"name": "Run tests",
|
||||
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
|
||||
@ -213,7 +209,7 @@ generate_job = CircleCIJob(
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv venv && uv pip install ."],
|
||||
install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"],
|
||||
marker="generate",
|
||||
parallelism=6,
|
||||
)
|
||||
@ -230,6 +226,22 @@ processor_job = CircleCIJob(
|
||||
parallelism=8,
|
||||
)
|
||||
|
||||
tf_job = CircleCIJob(
|
||||
"tf",
|
||||
docker_image=[{"image":"huggingface/transformers-tf-light"}],
|
||||
parallelism=6,
|
||||
)
|
||||
|
||||
|
||||
flax_job = CircleCIJob(
|
||||
"flax",
|
||||
docker_image=[{"image":"huggingface/transformers-jax-light"}],
|
||||
parallelism=6,
|
||||
pytest_num_workers=16,
|
||||
resource_class="2xlarge",
|
||||
)
|
||||
|
||||
|
||||
pipelines_torch_job = CircleCIJob(
|
||||
"pipelines_torch",
|
||||
additional_env={"RUN_PIPELINE_TESTS": True},
|
||||
@ -238,6 +250,16 @@ pipelines_torch_job = CircleCIJob(
|
||||
parallelism=4,
|
||||
)
|
||||
|
||||
|
||||
pipelines_tf_job = CircleCIJob(
|
||||
"pipelines_tf",
|
||||
additional_env={"RUN_PIPELINE_TESTS": True},
|
||||
docker_image=[{"image":"huggingface/transformers-tf-light"}],
|
||||
marker="is_pipeline_test",
|
||||
parallelism=4,
|
||||
)
|
||||
|
||||
|
||||
custom_tokenizers_job = CircleCIJob(
|
||||
"custom_tokenizers",
|
||||
additional_env={"RUN_CUSTOM_TOKENIZERS": True},
|
||||
@ -254,6 +276,15 @@ examples_torch_job = CircleCIJob(
|
||||
pytest_num_workers=4,
|
||||
)
|
||||
|
||||
|
||||
examples_tensorflow_job = CircleCIJob(
|
||||
"examples_tensorflow",
|
||||
additional_env={"OMP_NUM_THREADS": 8},
|
||||
docker_image=[{"image":"huggingface/transformers-examples-tf"}],
|
||||
pytest_num_workers=2,
|
||||
)
|
||||
|
||||
|
||||
hub_job = CircleCIJob(
|
||||
"hub",
|
||||
additional_env={"HUGGINGFACE_CO_STAGING": True},
|
||||
@ -274,7 +305,7 @@ onnx_job = CircleCIJob(
|
||||
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
|
||||
install_steps=[
|
||||
"uv venv",
|
||||
"uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
|
||||
"uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
|
||||
],
|
||||
pytest_options={"k onnx": None},
|
||||
pytest_num_workers=1,
|
||||
@ -303,7 +334,7 @@ non_model_job = CircleCIJob(
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv venv && uv pip install .[serving]"],
|
||||
install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"],
|
||||
marker="not generate",
|
||||
parallelism=6,
|
||||
)
|
||||
@ -333,7 +364,7 @@ doc_test_job = CircleCIJob(
|
||||
pytest_num_workers=1,
|
||||
)
|
||||
|
||||
REGULAR_TESTS = [torch_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
REGULAR_TESTS = [torch_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
EXAMPLES_TESTS = [examples_torch_job]
|
||||
PIPELINE_TESTS = [pipelines_torch_job]
|
||||
REPO_UTIL_TESTS = [repo_utils_job]
|
||||
@ -362,12 +393,7 @@ def create_circleci_config(folder=None):
|
||||
"parameters": {
|
||||
# Only used to accept the parameters from the trigger
|
||||
"nightly": {"type": "boolean", "default": False},
|
||||
# Only used to accept the parameters from GitHub Actions trigger
|
||||
"GHA_Actor": {"type": "string", "default": ""},
|
||||
"GHA_Action": {"type": "string", "default": ""},
|
||||
"GHA_Event": {"type": "string", "default": ""},
|
||||
"GHA_Meta": {"type": "string", "default": ""},
|
||||
"tests_to_run": {"type": "string", "default": ""},
|
||||
"tests_to_run": {"type": "string", "default": ''},
|
||||
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
|
||||
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
|
||||
},
|
||||
|
12
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
12
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@ -16,7 +16,7 @@ body:
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
|
||||
description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
|
||||
placeholder: transformers version, platform, python version, ...
|
||||
validations:
|
||||
required: true
|
||||
@ -48,20 +48,14 @@ body:
|
||||
- pipelines: @Rocketknight1
|
||||
- tensorflow: @gante and @Rocketknight1
|
||||
- tokenizers: @ArthurZucker and @itazap
|
||||
- trainer: @zach-huggingface @SunMarc
|
||||
- trainer: @muellerzr @SunMarc
|
||||
|
||||
Integrations:
|
||||
|
||||
- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
|
||||
- deepspeed: HF Trainer/Accelerate: @muellerzr
|
||||
- ray/raytune: @richardliaw, @amogkam
|
||||
- Big Model Inference: @SunMarc
|
||||
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
|
||||
|
||||
Devices/Backends:
|
||||
|
||||
- AMD ROCm: @ivarflakstad
|
||||
- Intel XPU: @IlyasMoutawwakil
|
||||
- Ascend NPU: @ivarflakstad
|
||||
|
||||
Documentation: @stevhliu
|
||||
|
||||
|
2
.github/ISSUE_TEMPLATE/i18n.md
vendored
2
.github/ISSUE_TEMPLATE/i18n.md
vendored
@ -23,7 +23,7 @@ Some notes:
|
||||
* Please translate in a gender-neutral way.
|
||||
* Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
|
||||
* Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
|
||||
* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu for review.
|
||||
* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu and @MKhalusova for review.
|
||||
* 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).
|
||||
|
||||
## Get Started section
|
||||
|
2
.github/ISSUE_TEMPLATE/migration.yml
vendored
2
.github/ISSUE_TEMPLATE/migration.yml
vendored
@ -6,7 +6,7 @@ body:
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
|
||||
description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
|
||||
render: shell
|
||||
placeholder: transformers version, platform, python version, ...
|
||||
validations:
|
||||
|
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -51,12 +51,12 @@ Library:
|
||||
- pipelines: @Rocketknight1
|
||||
- tensorflow: @gante and @Rocketknight1
|
||||
- tokenizers: @ArthurZucker
|
||||
- trainer: @zach-huggingface, @SunMarc and @qgallouedec
|
||||
- trainer: @muellerzr and @SunMarc
|
||||
- chat templates: @Rocketknight1
|
||||
|
||||
Integrations:
|
||||
|
||||
- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
|
||||
- deepspeed: HF Trainer/Accelerate: @muellerzr
|
||||
- ray/raytune: @richardliaw, @amogkam
|
||||
- Big Model Inference: @SunMarc
|
||||
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
|
||||
|
18
.github/scripts/assign_reviewers.py
vendored
18
.github/scripts/assign_reviewers.py
vendored
@ -54,21 +54,6 @@ def get_file_owners(file_path, codeowners_lines):
|
||||
return owners # Remember, can still be empty!
|
||||
return [] # Should never happen, but just in case
|
||||
|
||||
def pr_author_is_in_hf(pr_author, codeowners_lines):
|
||||
# Check if the PR author is in the codeowners file
|
||||
for line in codeowners_lines:
|
||||
line = line.split('#')[0].strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Split into pattern and owners
|
||||
parts = line.split()
|
||||
owners = [owner.removeprefix("@") for owner in parts[1:]]
|
||||
|
||||
if pr_author in owners:
|
||||
return True
|
||||
return False
|
||||
|
||||
def main():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
with open(script_dir / "codeowners_for_review_action") as f:
|
||||
@ -83,9 +68,6 @@ def main():
|
||||
pr_number = event['pull_request']['number']
|
||||
pr = repo.get_pull(pr_number)
|
||||
pr_author = pr.user.login
|
||||
if pr_author_is_in_hf(pr_author, codeowners_lines):
|
||||
print(f"PR author {pr_author} is in codeowners, skipping review request.")
|
||||
return
|
||||
|
||||
existing_reviews = list(pr.get_reviews())
|
||||
if existing_reviews:
|
||||
|
6
.github/scripts/codeowners_for_review_action
vendored
6
.github/scripts/codeowners_for_review_action
vendored
@ -14,7 +14,7 @@ docs/ @stevhliu
|
||||
# Owners of subsections of the library
|
||||
/src/transformers/generation/ @gante
|
||||
/src/transformers/pipeline/ @Rocketknight1 @yonigozlan
|
||||
/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface
|
||||
/src/transformers/integrations/ @SunMarc @MekkCyber @muellerzr
|
||||
/src/transformers/quantizers/ @SunMarc @MekkCyber
|
||||
tests/ @ydshieh
|
||||
tests/generation/ @gante
|
||||
@ -27,8 +27,8 @@ tests/generation/ @gante
|
||||
# Specific files come after the sections/globs, so they take priority
|
||||
/.circleci/config.yml @ArthurZucker @ydshieh
|
||||
/utils/tests_fetcher.py @ydshieh
|
||||
trainer.py @zach-huggingface @SunMarc
|
||||
trainer_utils.py @zach-huggingface @SunMarc
|
||||
trainer.py @muellerzr @SunMarc
|
||||
trainer_utils.py @muellerzr @SunMarc
|
||||
/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker
|
||||
|
||||
# Owners of individual models are specific / high priority, and so they come last
|
||||
|
2
.github/workflows/add-model-like.yml
vendored
2
.github/workflows/add-model-like.yml
vendored
@ -54,7 +54,7 @@ jobs:
|
||||
- name: Create model files
|
||||
run: |
|
||||
. ~/venv/bin/activate
|
||||
transformers add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
|
||||
transformers-cli add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
|
||||
make style
|
||||
make fix-copies
|
||||
|
||||
|
2
.github/workflows/benchmark.yml
vendored
2
.github/workflows/benchmark.yml
vendored
@ -64,7 +64,7 @@ jobs:
|
||||
commit_id=$GITHUB_SHA
|
||||
fi
|
||||
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
||||
python3 benchmark/benchmarks_entrypoint.py "huggingface/transformers" "$BRANCH_NAME" "$commit_id" "$commit_msg"
|
||||
python3 benchmark/benchmarks_entrypoint.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
# Enable this to see debug logs
|
||||
|
70
.github/workflows/build-docker-images.yml
vendored
70
.github/workflows/build-docker-images.yml
vendored
@ -19,7 +19,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
latest-docker:
|
||||
name: "Latest PyTorch [dev]"
|
||||
name: "Latest PyTorch + TensorFlow [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
@ -63,14 +63,14 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
@ -99,7 +99,7 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
@ -140,7 +140,7 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
@ -176,7 +176,7 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
@ -214,7 +214,7 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
@ -223,19 +223,19 @@ jobs:
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
@ -263,12 +263,14 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch-deepspeed-amd:
|
||||
name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
latest-tensorflow:
|
||||
name: "Latest TensorFlow [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
@ -285,6 +287,42 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-tensorflow-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-tensorflow-gpu
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch-deepspeed-amd:
|
||||
name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
@ -312,7 +350,7 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
@ -350,6 +388,6 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-quantization-latest-gpu build
|
||||
title: 🤗 Results of the transformers-quantization-latest-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
@ -42,7 +42,7 @@ jobs:
|
||||
nightly-torch-deepspeed-docker:
|
||||
name: "Nightly PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
|
4
.github/workflows/build_documentation.yml
vendored
4
.github/workflows/build_documentation.yml
vendored
@ -18,10 +18,6 @@ jobs:
|
||||
notebook_folder: transformers_doc
|
||||
languages: ar de en es fr hi it ko pt tr zh ja te
|
||||
custom_container: huggingface/transformers-doc-builder
|
||||
# Temporary pin to work around datasets exception in the docbuilder.Remove after docker images and main have
|
||||
# the right dependencies (which **should** be the case by 2025-07-20). See
|
||||
# https://github.com/huggingface/transformers/actions/runs/16365952006/job/46243081358?pr=38545
|
||||
pre_command: uv pip install datasets>=2.15.0
|
||||
secrets:
|
||||
token: ${{ secrets.HUGGINGFACE_PUSH }}
|
||||
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
|
||||
|
6
.github/workflows/build_pr_documentation.yml
vendored
6
.github/workflows/build_pr_documentation.yml
vendored
@ -14,8 +14,4 @@ jobs:
|
||||
commit_sha: ${{ github.event.pull_request.head.sha }}
|
||||
pr_number: ${{ github.event.number }}
|
||||
package: transformers
|
||||
languages: en
|
||||
# Temporary pin to work around datasets exception in the docbuilder. Remove after docker images and main have
|
||||
# the right dependencies (which **should** be the case by 2025-07-20). See
|
||||
# https://github.com/huggingface/transformers/actions/runs/16365952006/job/46243081358?pr=38545
|
||||
pre_command: uv pip install datasets>=2.15.0
|
||||
languages: ar de en es fr hi it ko pt tr zh ja te
|
||||
|
25
.github/workflows/change_pr_to_draft.yml
vendored
Normal file
25
.github/workflows/change_pr_to_draft.yml
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
name: Change PR to draft
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, reopened]
|
||||
|
||||
jobs:
|
||||
convert_pr_to_draft:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Convert PR to draft
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
if: github.event.pull_request.draft == false
|
||||
steps:
|
||||
- name: Convert PR to draft
|
||||
shell: bash
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.number }}
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
echo $PR_NUMBER
|
||||
gh pr ready $PR_NUMBER --repo $REPO --undo
|
||||
gh pr comment $PR_NUMBER --repo $REPO --body "Hi 👋, thank you for opening this pull request! The pull request is converted to draft by default. The CI will be paused while the PR is in draft mode. When it is ready for review, please click the \`Ready for review\` button (at the bottom of the PR page). This will assign reviewers and trigger CI."
|
@ -9,18 +9,6 @@ on:
|
||||
start_sha:
|
||||
required: true
|
||||
type: string
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
ci_event:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
@ -38,128 +26,77 @@ env:
|
||||
|
||||
|
||||
jobs:
|
||||
check_new_failures:
|
||||
run_models_gpu:
|
||||
name: " "
|
||||
runs-on:
|
||||
group: aws-g5-4xlarge-cache
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: ci_results_${{ inputs.job }}
|
||||
path: /transformers/ci_results_${{ inputs.job }}
|
||||
|
||||
- name: Check file
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
|
||||
echo "process=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
|
||||
echo "process=false" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
if: ${{ env.process == 'true' }}
|
||||
with:
|
||||
pattern: setup_values*
|
||||
path: setup_values
|
||||
merge-multiple: true
|
||||
|
||||
- name: Prepare some setup values
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
name: ci_results_run_models_gpu
|
||||
path: /transformers/ci_results_run_models_gpu
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Get target commit
|
||||
working-directory: /transformers/utils
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout to `start_sha`
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: pip freeze
|
||||
|
||||
- name: Check failed tests
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Show results
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
ls -l new_failures_with_bad_commit.json
|
||||
cat new_failures_with_bad_commit.json
|
||||
ls -l new_model_failures_with_bad_commit.json
|
||||
cat new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Checkout back
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
JOB_NAME: ${{ inputs.job }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
run: |
|
||||
python3 utils/process_bad_commit_report.py
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
JOB_NAME: ${{ inputs.job }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
run: |
|
||||
{
|
||||
echo 'REPORT_TEXT<<EOF'
|
||||
@ -167,31 +104,17 @@ jobs:
|
||||
echo EOF
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Prepare Slack report title
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Send processed report
|
||||
if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
|
||||
with:
|
||||
# Slack channel id, channel name, or user id to post message.
|
||||
# See also: https://api.slack.com/methods/chat.postMessage#channels
|
||||
channel-id: '#${{ inputs.slack_report_channel }}'
|
||||
channel-id: '#transformers-ci-feedback-tests'
|
||||
# For posting a rich message using Block Kit
|
||||
payload: |
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"type": "header",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": "${{ env.title }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
2
.github/workflows/doctest_job.yml
vendored
2
.github/workflows/doctest_job.yml
vendored
@ -28,7 +28,7 @@ jobs:
|
||||
matrix:
|
||||
split_keys: ${{ fromJson(inputs.split_keys) }}
|
||||
runs-on:
|
||||
group: aws-g5-4xlarge-cache
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
2
.github/workflows/doctests.yml
vendored
2
.github/workflows/doctests.yml
vendored
@ -15,7 +15,7 @@ jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on:
|
||||
group: aws-g5-4xlarge-cache
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
157
.github/workflows/get-pr-info.yml
vendored
157
.github/workflows/get-pr-info.yml
vendored
@ -1,157 +0,0 @@
|
||||
name: Get PR commit SHA
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
pr_number:
|
||||
required: true
|
||||
type: string
|
||||
outputs:
|
||||
PR_HEAD_REPO_FULL_NAME:
|
||||
description: "The full name of the repository from which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
|
||||
PR_BASE_REPO_FULL_NAME:
|
||||
description: "The full name of the repository to which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
|
||||
PR_HEAD_REPO_OWNER:
|
||||
description: "The owner of the repository from which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
|
||||
PR_BASE_REPO_OWNER:
|
||||
description: "The owner of the repository to which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
|
||||
PR_HEAD_REPO_NAME:
|
||||
description: "The name of the repository from which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
|
||||
PR_BASE_REPO_NAME:
|
||||
description: "The name of the repository to which the pull request is created"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
|
||||
PR_HEAD_REF:
|
||||
description: "The branch name of the pull request in the head repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
|
||||
PR_BASE_REF:
|
||||
description: "The branch name in the base repository (to merge into)"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
|
||||
PR_HEAD_SHA:
|
||||
description: "The head sha of the pull request branch in the head repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
|
||||
PR_BASE_SHA:
|
||||
description: "The head sha of the target branch in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
|
||||
PR_MERGE_COMMIT_SHA:
|
||||
description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
|
||||
PR_HEAD_COMMIT_DATE:
|
||||
description: "The date of the head sha of the pull request branch in the head repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
|
||||
PR_MERGE_COMMIT_DATE:
|
||||
description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
|
||||
PR_HEAD_COMMIT_TIMESTAMP:
|
||||
description: "The timestamp of the head sha of the pull request branch in the head repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
|
||||
PR_MERGE_COMMIT_TIMESTAMP:
|
||||
description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
|
||||
PR:
|
||||
description: "The PR"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR }}
|
||||
PR_FILES:
|
||||
description: "The files touched in the PR"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
|
||||
|
||||
|
||||
jobs:
|
||||
get-pr-info:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Get PR commit SHA better
|
||||
outputs:
|
||||
PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
|
||||
PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
|
||||
PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
|
||||
PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
|
||||
PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
|
||||
PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
|
||||
PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
|
||||
PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
|
||||
PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
|
||||
PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
|
||||
PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
|
||||
PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
|
||||
PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
|
||||
PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
|
||||
PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
|
||||
PR: ${{ steps.pr_info.outputs.pr }}
|
||||
PR_FILES: ${{ steps.pr_info.outputs.files }}
|
||||
if: ${{ inputs.pr_number != '' }}
|
||||
steps:
|
||||
- name: Extract PR details
|
||||
id: pr_info
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: ${{ inputs.pr_number }}
|
||||
});
|
||||
|
||||
const { data: head_commit } = await github.rest.repos.getCommit({
|
||||
owner: pr.head.repo.owner.login,
|
||||
repo: pr.head.repo.name,
|
||||
ref: pr.head.ref
|
||||
});
|
||||
|
||||
const { data: merge_commit } = await github.rest.repos.getCommit({
|
||||
owner: pr.base.repo.owner.login,
|
||||
repo: pr.base.repo.name,
|
||||
ref: pr.merge_commit_sha,
|
||||
});
|
||||
|
||||
const { data: files } = await github.rest.pulls.listFiles({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: ${{ inputs.pr_number }}
|
||||
});
|
||||
|
||||
core.setOutput('head_repo_full_name', pr.head.repo.full_name);
|
||||
core.setOutput('base_repo_full_name', pr.base.repo.full_name);
|
||||
core.setOutput('head_repo_owner', pr.head.repo.owner.login);
|
||||
core.setOutput('base_repo_owner', pr.base.repo.owner.login);
|
||||
core.setOutput('head_repo_name', pr.head.repo.name);
|
||||
core.setOutput('base_repo_name', pr.base.repo.name);
|
||||
core.setOutput('head_ref', pr.head.ref);
|
||||
core.setOutput('base_ref', pr.base.ref);
|
||||
core.setOutput('head_sha', pr.head.sha);
|
||||
core.setOutput('base_sha', pr.base.sha);
|
||||
core.setOutput('merge_commit_sha', pr.merge_commit_sha);
|
||||
core.setOutput('pr', pr);
|
||||
|
||||
core.setOutput('head_commit_date', head_commit.commit.committer.date);
|
||||
core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
|
||||
|
||||
core.setOutput('files', files);
|
||||
|
||||
console.log('PR head commit:', {
|
||||
head_commit: head_commit,
|
||||
commit: head_commit.commit,
|
||||
date: head_commit.commit.committer.date
|
||||
});
|
||||
|
||||
console.log('PR merge commit:', {
|
||||
merge_commit: merge_commit,
|
||||
commit: merge_commit.commit,
|
||||
date: merge_commit.commit.committer.date
|
||||
});
|
||||
|
||||
- name: Convert dates to timestamps
|
||||
id: get_timestamps
|
||||
run: |
|
||||
head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
|
||||
merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
|
||||
echo $head_commit_date
|
||||
echo $merge_commit_date
|
||||
head_commit_timestamp=$(date -d "$head_commit_date" +%s)
|
||||
merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
|
||||
echo $head_commit_timestamp
|
||||
echo $merge_commit_timestamp
|
||||
echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
|
||||
echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
|
36
.github/workflows/get-pr-number.yml
vendored
36
.github/workflows/get-pr-number.yml
vendored
@ -1,36 +0,0 @@
|
||||
name: Get PR number
|
||||
on:
|
||||
workflow_call:
|
||||
outputs:
|
||||
PR_NUMBER:
|
||||
description: "The extracted PR number"
|
||||
value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
|
||||
|
||||
jobs:
|
||||
get-pr-number:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Get PR number
|
||||
outputs:
|
||||
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
|
||||
steps:
|
||||
- name: Get PR number
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.pull_request }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PR_NUMBER=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Check PR number
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ env.PR_NUMBER }}"
|
||||
|
||||
- name: Set PR number
|
||||
id: set_pr_number
|
||||
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
|
30
.github/workflows/model_jobs.yml
vendored
30
.github/workflows/model_jobs.yml
vendored
@ -12,16 +12,12 @@ on:
|
||||
slice_id:
|
||||
required: true
|
||||
type: number
|
||||
runner_map:
|
||||
required: false
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
report_name_prefix:
|
||||
required: false
|
||||
default: run_models_gpu
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -45,7 +41,7 @@ jobs:
|
||||
matrix:
|
||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||
runs-on:
|
||||
group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
|
||||
group: '${{ inputs.machine_type }}'
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@ -107,9 +103,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ inputs.machine_type }}"
|
||||
|
||||
if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ inputs.machine_type }}
|
||||
@ -120,23 +116,23 @@ jobs:
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Run test
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
|
||||
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
|
||||
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
|
128
.github/workflows/model_jobs_amd.yml
vendored
Normal file
128
.github/workflows/model_jobs_amd.yml
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
name: model jobs
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
folder_slices:
|
||||
required: true
|
||||
type: string
|
||||
machine_type:
|
||||
required: true
|
||||
type: string
|
||||
slice_id:
|
||||
required: true
|
||||
type: number
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
run_models_gpu:
|
||||
name: " "
|
||||
strategy:
|
||||
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||
runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Echo input and matrix info
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ inputs.folder_slices }}"
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Update / Install some packages (for Past CI)
|
||||
if: ${{ contains(inputs.docker, '-past-') }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pip install -U datasets
|
||||
|
||||
- name: Update / Install some packages (for Past CI)
|
||||
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
- name: ROCM-SMI
|
||||
run: |
|
||||
rocm-smi
|
||||
|
||||
- name: ROCM-INFO
|
||||
run: |
|
||||
rocminfo | grep "Agent" -A 14
|
||||
|
||||
- name: Show ROCR environment
|
||||
run: |
|
||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Run test
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
121
.github/workflows/model_jobs_intel_gaudi.yml
vendored
121
.github/workflows/model_jobs_intel_gaudi.yml
vendored
@ -1,121 +0,0 @@
|
||||
name: model jobs
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
folder_slices:
|
||||
required: true
|
||||
type: string
|
||||
slice_id:
|
||||
required: true
|
||||
type: number
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
machine_type:
|
||||
required: true
|
||||
type: string
|
||||
report_name_prefix:
|
||||
required: false
|
||||
default: run_models_gpu
|
||||
type: string
|
||||
|
||||
env:
|
||||
RUN_SLOW: yes
|
||||
PT_HPU_LAZY_MODE: 0
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
PT_ENABLE_INT64_SUPPORT: 1
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
|
||||
jobs:
|
||||
run_models_gpu:
|
||||
name: " "
|
||||
strategy:
|
||||
max-parallel: 8
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||
runs-on:
|
||||
group: ${{ inputs.runner }}
|
||||
container:
|
||||
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
|
||||
options: --runtime=habana
|
||||
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
|
||||
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
|
||||
--env HABANA_VISIBLE_DEVICES
|
||||
--env HABANA_VISIBLE_MODULES
|
||||
--cap-add=sys_nice
|
||||
--shm-size=64G
|
||||
steps:
|
||||
- name: Echo input and matrix info
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ inputs.folder_slices }}"
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn
|
||||
|
||||
- name: HL-SMI
|
||||
run: |
|
||||
hl-smi
|
||||
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
|
||||
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
|
||||
|
||||
- name: Environment
|
||||
run: python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ inputs.machine_type }}" = "1gaudi" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ inputs.machine_type }}" = "2gaudi" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ inputs.machine_type }}
|
||||
fi
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run all tests on Gaudi
|
||||
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Run test
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
|
||||
path: reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
|
@ -59,7 +59,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh\ncommit SHA: ${{ env.COMMIT_SHA }}"
|
||||
"text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
18
.github/workflows/pr-style-bot.yml
vendored
18
.github/workflows/pr-style-bot.yml
vendored
@ -1,18 +0,0 @@
|
||||
# To run this bot, comment "@bot /style" on a PR
|
||||
name: Style Bot
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
style:
|
||||
uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
|
||||
with:
|
||||
python_quality_dependencies: "[quality]"
|
||||
style_command_type: "default"
|
||||
secrets:
|
||||
bot_token: ${{ secrets.HF_STYLE_BOT_ACTION }}
|
199
.github/workflows/pr_run_slow_ci.yml
vendored
199
.github/workflows/pr_run_slow_ci.yml
vendored
@ -1,199 +0,0 @@
|
||||
name: PR slow CI
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
get-pr-number:
|
||||
name: Get PR number
|
||||
uses: ./.github/workflows/get-pr-number.yml
|
||||
|
||||
get-pr-info:
|
||||
name: Get PR commit SHA
|
||||
needs: get-pr-number
|
||||
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
|
||||
uses: ./.github/workflows/get-pr-info.yml
|
||||
with:
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
|
||||
# We only need to verify the timestamp if the workflow is triggered by `issue_comment`.
|
||||
verity_pr_commit:
|
||||
name: Verity PR commit corresponds to a specific event by comparing timestamps
|
||||
if: ${{ github.event.comment.created_at != '' }}
|
||||
runs-on: ubuntu-22.04
|
||||
needs: get-pr-info
|
||||
env:
|
||||
COMMENT_DATE: ${{ github.event.comment.created_at }}
|
||||
PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
|
||||
PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
|
||||
steps:
|
||||
- run: |
|
||||
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
|
||||
echo "COMMENT_DATE: $COMMENT_DATE"
|
||||
echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE"
|
||||
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
|
||||
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
|
||||
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
|
||||
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
|
||||
exit -1;
|
||||
fi
|
||||
|
||||
get-jobs:
|
||||
name: Get test files to run
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [get-pr-number, get-pr-info]
|
||||
outputs:
|
||||
jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
|
||||
steps:
|
||||
- name: Get repository content
|
||||
id: repo_content
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const { data: tests_dir } = await github.rest.repos.getContent({
|
||||
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
|
||||
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
|
||||
path: 'tests',
|
||||
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
|
||||
});
|
||||
|
||||
const { data: tests_models_dir } = await github.rest.repos.getContent({
|
||||
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
|
||||
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
|
||||
path: 'tests/models',
|
||||
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
|
||||
});
|
||||
|
||||
const { data: tests_quantization_dir } = await github.rest.repos.getContent({
|
||||
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
|
||||
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
|
||||
path: 'tests/quantization',
|
||||
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
|
||||
});
|
||||
|
||||
core.setOutput('tests_dir', tests_dir);
|
||||
core.setOutput('tests_models_dir', tests_models_dir);
|
||||
core.setOutput('tests_quantization_dir', tests_quantization_dir);
|
||||
|
||||
# This checkout to the main branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Write pr_files file
|
||||
run: |
|
||||
cat > pr_files.txt << 'EOF'
|
||||
${{ needs.get-pr-info.outputs.PR_FILES }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_dir file
|
||||
run: |
|
||||
cat > tests_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_dir }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_models_dir file
|
||||
run: |
|
||||
cat > tests_models_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_models_dir }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_quantization_dir file
|
||||
run: |
|
||||
cat > tests_quantization_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_quantization_dir }}
|
||||
EOF
|
||||
|
||||
- name: Run script to get jobs to run
|
||||
id: get_jobs
|
||||
run: |
|
||||
python utils/get_pr_run_slow_jobs.py | tee output.txt
|
||||
echo "jobs_to_run: $(tail -n 1 output.txt)"
|
||||
echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT
|
||||
|
||||
send_comment:
|
||||
# Will delete the previous comment and send a new one if:
|
||||
# - either the content is changed
|
||||
# - or the previous comment is 30 minutes or more old
|
||||
name: Send a comment to suggest jobs to run
|
||||
if: ${{ needs.get-jobs.outputs.jobs != '' }}
|
||||
needs: [get-pr-number, get-jobs]
|
||||
permissions:
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check and update comment if needed
|
||||
uses: actions/github-script@v7
|
||||
env:
|
||||
BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}"
|
||||
with:
|
||||
script: |
|
||||
const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }};
|
||||
const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)";
|
||||
const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000); // 30 minutes ago
|
||||
const newBody = `${commentPrefix}${process.env.BODY}`;
|
||||
|
||||
// Get all comments on the PR
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber
|
||||
});
|
||||
|
||||
// Find existing comments that start with our prefix
|
||||
const existingComments = comments.filter(comment =>
|
||||
comment.user.login === 'github-actions[bot]' &&
|
||||
comment.body.startsWith(commentPrefix)
|
||||
);
|
||||
|
||||
let shouldCreateNewComment = true;
|
||||
let commentsToDelete = [];
|
||||
|
||||
if (existingComments.length > 0) {
|
||||
// Get the most recent comment
|
||||
const mostRecentComment = existingComments
|
||||
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
|
||||
|
||||
const commentDate = new Date(mostRecentComment.created_at);
|
||||
const isOld = commentDate < thirtyMinutesAgo;
|
||||
const isDifferentContent = mostRecentComment.body !== newBody;
|
||||
|
||||
console.log(`Most recent comment created: ${mostRecentComment.created_at}`);
|
||||
console.log(`Is older than 30 minutes: ${isOld}`);
|
||||
console.log(`Has different content: ${isDifferentContent}`);
|
||||
|
||||
if (isOld || isDifferentContent) {
|
||||
// Delete all existing comments and create new one
|
||||
commentsToDelete = existingComments;
|
||||
console.log(`Will delete ${commentsToDelete.length} existing comment(s) and create new one`);
|
||||
} else {
|
||||
// Content is same and comment is recent, skip
|
||||
shouldCreateNewComment = false;
|
||||
console.log('Comment is recent and content unchanged, skipping update');
|
||||
}
|
||||
} else {
|
||||
console.log('No existing comments found, will create new one');
|
||||
}
|
||||
|
||||
// Delete old comments if needed
|
||||
for (const comment of commentsToDelete) {
|
||||
console.log(`Deleting comment #${comment.id} (created: ${comment.created_at})`);
|
||||
await github.rest.issues.deleteComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: comment.id
|
||||
});
|
||||
}
|
||||
|
||||
// Create new comment if needed
|
||||
if (shouldCreateNewComment) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: newBody
|
||||
});
|
||||
console.log('✅ New comment created');
|
||||
} else {
|
||||
console.log('ℹ️ No comment update needed');
|
||||
}
|
16
.github/workflows/self-comment-ci.yml
vendored
16
.github/workflows/self-comment-ci.yml
vendored
@ -29,7 +29,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Get PR number
|
||||
# For security: only allow team members to run
|
||||
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
|
||||
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
|
||||
outputs:
|
||||
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
|
||||
steps:
|
||||
@ -145,7 +145,7 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
MODELS: ${{ needs.get-tests.outputs.models }}
|
||||
BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
|
||||
BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
|
||||
run: |
|
||||
gh api \
|
||||
--method POST \
|
||||
@ -185,7 +185,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.get-tests.outputs.models) }}
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -239,9 +239,9 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -292,7 +292,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -338,9 +338,9 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
|
26
.github/workflows/self-push.yml
vendored
26
.github/workflows/self-push.yml
vendored
@ -31,7 +31,7 @@ jobs:
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -131,7 +131,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -169,9 +169,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -244,7 +244,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -282,9 +282,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -357,7 +357,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -395,9 +395,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -467,7 +467,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -505,9 +505,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
|
55
.github/workflows/self-scheduled-amd-mi210-caller.yml
vendored
Normal file
55
.github/workflows/self-scheduled-amd-mi210-caller.yml
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
name: Self-hosted runner (AMD mi210 scheduled CI caller)
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
|
||||
branches: ["main"]
|
||||
types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_scheduled_ci_caller*
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
secrets: inherit
|
@ -15,11 +15,10 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@ -27,11 +26,10 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@ -39,11 +37,10 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@ -51,9 +48,8 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-amd"
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
@ -1,63 +0,0 @@
|
||||
name: Self-hosted runner scale set (AMD mi300 scheduled CI caller)
|
||||
|
||||
# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
|
||||
# For example, 1gpu scale set: amd-mi300-ci-1gpu
|
||||
# 2gpu scale set: amd-mi300-ci-2gpu
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
|
||||
branches: ["main"]
|
||||
types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_scheduled_ci_caller*
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
@ -1,63 +0,0 @@
|
||||
name: Self-hosted runner scale set (AMD mi325 scheduled CI caller)
|
||||
|
||||
# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
|
||||
# For example, 1gpu scale set: amd-mi325-ci-1gpu
|
||||
# 2gpu scale set: amd-mi325-ci-2gpu
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
|
||||
branches: ["main"]
|
||||
types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_scheduled_ci_caller*
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi325-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi325-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi325-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi325-ci
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
69
.github/workflows/self-scheduled-caller.yml
vendored
69
.github/workflows/self-scheduled-caller.yml
vendored
@ -8,52 +8,17 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
description: 'previous workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
other_workflow_run_id:
|
||||
description: 'other workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
|
||||
# Used for `push` to easily modify the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-models"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@ -62,9 +27,20 @@ jobs:
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-pytorch-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
tf-pipeline:
|
||||
name: TF pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_tf_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-tensorflow-gpu
|
||||
ci_event: Daily CI
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@ -73,20 +49,9 @@ jobs:
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-examples"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
trainer-fsdp-ci:
|
||||
name: Trainer/FSDP CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_trainer_and_fsdp_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-training"
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@ -94,11 +59,11 @@ jobs:
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-training"
|
||||
slack_report_channel: "#transformers-ci-daily-deepspeed"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
ci_event: Daily CI
|
||||
working-directory-prefix: /workspace
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
@ -107,7 +72,7 @@ jobs:
|
||||
with:
|
||||
job: run_quantization_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-quantization"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-quantization-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
342
.github/workflows/self-scheduled-intel-gaudi.yml
vendored
342
.github/workflows/self-scheduled-intel-gaudi.yml
vendored
@ -1,342 +0,0 @@
|
||||
name: Self-hosted runner (scheduled-intel-gaudi)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
runner_scale_set:
|
||||
required: true
|
||||
type: string
|
||||
ci_event:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
NUM_SLICES: 2
|
||||
RUN_SLOW: yes
|
||||
PT_HPU_LAZY_MODE: 0
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
PT_ENABLE_INT64_SUPPORT: 1
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
|
||||
name: Setup
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
quantization_matrix: ${{ steps.set-matrix.outputs.quantization_matrix }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- id: set-matrix
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
|
||||
name: Identify models to test
|
||||
working-directory: tests
|
||||
run: |
|
||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- id: set-matrix-quantization
|
||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||
name: Identify quantization method to test
|
||||
working-directory: tests
|
||||
run: |
|
||||
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
run_models_gpu:
|
||||
if: ${{ inputs.job == 'run_models_gpu' }}
|
||||
name: " "
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [1gaudi, 2gaudi]
|
||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||
uses: ./.github/workflows/model_jobs_intel_gaudi.yml
|
||||
with:
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
|
||||
secrets: inherit
|
||||
|
||||
run_trainer_and_fsdp_gpu:
|
||||
if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
|
||||
name: " "
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [1gaudi, 2gaudi]
|
||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||
uses: ./.github/workflows/model_jobs_intel_gaudi.yml
|
||||
with:
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
|
||||
report_name_prefix: run_trainer_and_fsdp_gpu
|
||||
secrets: inherit
|
||||
|
||||
run_pipelines_torch_gpu:
|
||||
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
|
||||
name: Pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [1gaudi, 2gaudi]
|
||||
runs-on:
|
||||
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
|
||||
container:
|
||||
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
|
||||
options: --runtime=habana
|
||||
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
|
||||
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
|
||||
--env HABANA_VISIBLE_DEVICES
|
||||
--env HABANA_VISIBLE_MODULES
|
||||
--cap-add=sys_nice
|
||||
--shm-size=64G
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
|
||||
|
||||
- name: HL-SMI
|
||||
run: |
|
||||
hl-smi
|
||||
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
|
||||
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
|
||||
|
||||
- name: Environment
|
||||
run: python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run all pipeline tests on Intel Gaudi
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cat reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||
path: reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||
|
||||
run_examples_gpu:
|
||||
if: ${{ inputs.job == 'run_examples_gpu' }}
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [1gaudi]
|
||||
runs-on:
|
||||
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
|
||||
container:
|
||||
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
|
||||
options: --runtime=habana
|
||||
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
|
||||
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
|
||||
--env HABANA_VISIBLE_DEVICES
|
||||
--env HABANA_VISIBLE_MODULES
|
||||
--cap-add=sys_nice
|
||||
--shm-size=64G
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
|
||||
|
||||
- name: HL-SMI
|
||||
run: |
|
||||
hl-smi
|
||||
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
|
||||
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: |
|
||||
pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run examples tests on Intel Gaudi
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cat reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_examples_gpu_test_reports
|
||||
path: reports/${{ env.machine_type }}_run_examples_gpu_test_reports
|
||||
|
||||
run_torch_cuda_extensions_gpu:
|
||||
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
|
||||
name: Intel Gaudi deepspeed tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [1gaudi, 2gaudi]
|
||||
runs-on:
|
||||
group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
|
||||
container:
|
||||
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
|
||||
options: --runtime=habana
|
||||
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
|
||||
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
|
||||
--env HABANA_VISIBLE_DEVICES
|
||||
--env HABANA_VISIBLE_MODULES
|
||||
--cap-add=sys_nice
|
||||
--shm-size=64G
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
|
||||
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
|
||||
|
||||
- name: HL-SMI
|
||||
run: |
|
||||
hl-smi
|
||||
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
|
||||
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: |
|
||||
pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run all deepspeed tests on intel Gaudi
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cat reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
path: reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
|
||||
send_results:
|
||||
name: Slack Report
|
||||
needs:
|
||||
[
|
||||
setup,
|
||||
run_models_gpu,
|
||||
run_examples_gpu,
|
||||
run_torch_cuda_extensions_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_trainer_and_fsdp_gpu,
|
||||
]
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/workflows/slack-report.yml
|
||||
with:
|
||||
job: ${{ inputs.job }}
|
||||
setup_status: ${{ needs.setup.result }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
|
||||
secrets: inherit
|
@ -1,67 +0,0 @@
|
||||
name: Self-hosted runner (Intel Gaudi3 scheduled CI caller)
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
|
||||
with:
|
||||
job: run_models_gpu
|
||||
ci_event: Scheduled CI (Intel) - Gaudi3
|
||||
runner_scale_set: itac-bm-emr-gaudi3-dell
|
||||
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
|
||||
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
|
||||
|
||||
secrets: inherit
|
||||
|
||||
pipeline-ci:
|
||||
name: Pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
ci_event: Scheduled CI (Intel) - Gaudi3
|
||||
runner_scale_set: itac-bm-emr-gaudi3-dell
|
||||
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
|
||||
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
|
||||
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
ci_event: Scheduled CI (Intel) - Gaudi3
|
||||
runner_scale_set: itac-bm-emr-gaudi3-dell
|
||||
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
|
||||
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
|
||||
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
ci_event: Scheduled CI (Intel) - Gaudi3
|
||||
runner_scale_set: itac-bm-emr-gaudi3-dell
|
||||
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
|
||||
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
|
||||
|
||||
secrets: inherit
|
||||
|
||||
trainer-fsdp-ci:
|
||||
name: Trainer/FSDP CI
|
||||
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
|
||||
with:
|
||||
job: run_trainer_and_fsdp_gpu
|
||||
ci_event: Scheduled CI (Intel) - Gaudi3
|
||||
runner_scale_set: itac-bm-emr-gaudi3-dell
|
||||
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
|
||||
report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
|
||||
secrets: inherit
|
158
.github/workflows/self-scheduled.yml
vendored
158
.github/workflows/self-scheduled.yml
vendored
@ -15,6 +15,9 @@ on:
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
@ -25,10 +28,6 @@ on:
|
||||
default: ''
|
||||
required: false
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -46,11 +45,11 @@ env:
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
|
||||
if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job)
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -59,7 +58,6 @@ jobs:
|
||||
outputs:
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
runner_map: ${{ steps.set-matrix.outputs.runner_map }}
|
||||
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
||||
steps:
|
||||
- name: Update clone
|
||||
@ -79,18 +77,12 @@ jobs:
|
||||
run: pip freeze
|
||||
|
||||
- id: set-matrix
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
|
||||
if: ${{ inputs.job == 'run_models_gpu' }}
|
||||
name: Identify models to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- id: set-matrix-quantization
|
||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||
@ -110,43 +102,24 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||
uses: ./.github/workflows/model_jobs.yml
|
||||
with:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||
runner: ${{ inputs.runner }}
|
||||
docker: ${{ inputs.docker }}
|
||||
secrets: inherit
|
||||
|
||||
run_trainer_and_fsdp_gpu:
|
||||
if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
|
||||
name: " "
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
slice_id: [0, 1]
|
||||
uses: ./.github/workflows/model_jobs.yml
|
||||
with:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||
docker: ${{ inputs.docker }}
|
||||
report_name_prefix: run_trainer_and_fsdp_gpu
|
||||
secrets: inherit
|
||||
|
||||
run_pipelines_torch_gpu:
|
||||
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
|
||||
name: PyTorch pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -180,9 +153,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -208,13 +181,82 @@ jobs:
|
||||
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||
|
||||
run_pipelines_tf_gpu:
|
||||
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
|
||||
name: TensorFlow pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-tensorflow-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run all pipeline tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
|
||||
|
||||
run_examples_gpu:
|
||||
if: ${{ inputs.job == 'run_examples_gpu' }}
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -248,9 +290,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -283,7 +325,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -340,14 +382,14 @@ jobs:
|
||||
run: pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -382,7 +424,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
@ -425,9 +467,9 @@ jobs:
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
@ -499,8 +541,8 @@ jobs:
|
||||
needs: [
|
||||
setup,
|
||||
run_models_gpu,
|
||||
run_trainer_and_fsdp_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_examples_gpu,
|
||||
run_torch_cuda_extensions_gpu,
|
||||
run_quantization_torch_gpu,
|
||||
@ -517,21 +559,15 @@ jobs:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
check_new_failures:
|
||||
if: ${{ always() && inputs.ci_event == 'Daily CI' && needs.send_results.result == 'success' }}
|
||||
name: Check new failures
|
||||
check_new_model_failures:
|
||||
if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
|
||||
name: Check new model failures
|
||||
needs: send_results
|
||||
uses: ./.github/workflows/check_failed_tests.yml
|
||||
uses: ./.github/workflows/check_failed_model_tests.yml
|
||||
with:
|
||||
docker: ${{ inputs.docker }}
|
||||
start_sha: ${{ github.sha }}
|
||||
job: ${{ inputs.job }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
|
||||
secrets: inherit
|
||||
|
58
.github/workflows/slack-report.yml
vendored
58
.github/workflows/slack-report.yml
vendored
@ -21,9 +21,6 @@ on:
|
||||
ci_event:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
@ -42,23 +39,8 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Prepare some setup values
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Send message to Slack
|
||||
shell: bash
|
||||
if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
@ -68,22 +50,19 @@ jobs:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
# empty string, and the called script still get one argument (which is the emtpy string).
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
if [ "${{ inputs.quantization_matrix }}" != "" ]; then
|
||||
python utils/notification_service.py "${{ inputs.quantization_matrix }}"
|
||||
else
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
fi
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
@ -91,3 +70,32 @@ jobs:
|
||||
with:
|
||||
name: ci_results_${{ inputs.job }}
|
||||
path: ci_results_${{ inputs.job }}
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack for quantization workflow
|
||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ci_results_${{ inputs.job }}
|
||||
path: ci_results_${{ inputs.job }}
|
||||
|
2
.github/workflows/ssh-runner.yml
vendored
2
.github/workflows/ssh-runner.yml
vendored
@ -35,7 +35,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
|
||||
echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
|
||||
echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
|
||||
echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -167,6 +167,3 @@ tags
|
||||
|
||||
# ruff
|
||||
.ruff_cache
|
||||
|
||||
# modular conversion
|
||||
*.modular_backup
|
||||
|
39
AGENTS.md
39
AGENTS.md
@ -1,39 +0,0 @@
|
||||
# AGENTS.md Guide for Hugging Face Transformers
|
||||
|
||||
This AGENTS.md file provides guidance for code agents working with this codebase.
|
||||
|
||||
## Core Project Structure
|
||||
|
||||
- `/src/transformers`: This contains the core source code for the library
|
||||
- `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
|
||||
- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
|
||||
- `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
|
||||
- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
|
||||
|
||||
## Coding Conventions for Hugging Face Transformers
|
||||
|
||||
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
|
||||
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
|
||||
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
|
||||
|
||||
## Copying and inheritance
|
||||
|
||||
Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
|
||||
We use two mechanisms to keep this code in sync:
|
||||
|
||||
- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
|
||||
These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
|
||||
either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
|
||||
- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
|
||||
automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
|
||||
should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
|
||||
|
||||
When adding new models, you should prefer `modular` style.
|
||||
|
||||
## Testing
|
||||
|
||||
After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
|
||||
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
|
||||
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
|
||||
|
||||
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
|
@ -78,7 +78,7 @@ Once you've confirmed the bug hasn't already been reported, please include the f
|
||||
To get the OS and software versions automatically, run the following command:
|
||||
|
||||
```bash
|
||||
transformers env
|
||||
transformers-cli env
|
||||
```
|
||||
|
||||
You can also run the same command from the root of the repository:
|
||||
|
@ -26,7 +26,7 @@ There are two main venues to receive support: [the forums](https://discuss.huggi
|
||||
|
||||
[The user forums](https://discuss.huggingface.co/) are supported by the wide community of the library users and backed up by developers when needed.
|
||||
|
||||
If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystallized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
|
||||
If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystalized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
|
||||
|
||||
In particular all "Please explain" questions or objectively very user-specific feature requests belong to the forums. Here are some example of such questions:
|
||||
|
||||
|
22
Makefile
22
Makefile
@ -8,19 +8,13 @@ check_dirs := examples tests src utils
|
||||
exclude_folders := ""
|
||||
|
||||
modified_only_fixup:
|
||||
@current_branch=$$(git branch --show-current); \
|
||||
if [ "$$current_branch" = "main" ]; then \
|
||||
echo "On main branch, running 'style' target instead..."; \
|
||||
$(MAKE) style; \
|
||||
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||
@if test -n "$(modified_py_files)"; then \
|
||||
echo "Checking/fixing $(modified_py_files)"; \
|
||||
ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
|
||||
ruff format $(modified_py_files) --exclude $(exclude_folders);\
|
||||
else \
|
||||
modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
|
||||
if [ -n "$$modified_py_files" ]; then \
|
||||
echo "Checking/fixing files: $${modified_py_files}"; \
|
||||
ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
|
||||
ruff format $${modified_py_files} --exclude $(exclude_folders); \
|
||||
else \
|
||||
echo "No library .py files were modified"; \
|
||||
fi; \
|
||||
echo "No library .py files were modified"; \
|
||||
fi
|
||||
|
||||
# Update src/transformers/dependency_versions_table.py
|
||||
@ -46,7 +40,6 @@ repo-consistency:
|
||||
python utils/check_dummies.py
|
||||
python utils/check_repo.py
|
||||
python utils/check_inits.py
|
||||
python utils/check_pipeline_typing.py
|
||||
python utils/check_config_docstrings.py
|
||||
python utils/check_config_attributes.py
|
||||
python utils/check_doctest_list.py
|
||||
@ -86,9 +79,8 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
|
||||
|
||||
fix-copies:
|
||||
python utils/check_copies.py --fix_and_overwrite
|
||||
python utils/check_modular_conversion.py --fix_and_overwrite
|
||||
python utils/check_modular_conversion.py --fix_and_overwrite
|
||||
python utils/check_dummies.py --fix_and_overwrite
|
||||
python utils/check_pipeline_typing.py --fix_and_overwrite
|
||||
python utils/check_doctest_list.py --fix_and_overwrite
|
||||
python utils/check_docstrings.py --fix_and_overwrite
|
||||
|
||||
|
34
README.md
34
README.md
@ -44,7 +44,7 @@ limitations under the License.
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Português</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
|
||||
<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
|
||||
@ -59,28 +59,18 @@ limitations under the License.
|
||||
</h3>
|
||||
|
||||
<h3 align="center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/>
|
||||
<a href="https://hf.co/course"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/course_banner.png"></a>
|
||||
</h3>
|
||||
|
||||
Transformers is a library of pretrained text, computer vision, audio, video, and multimodal models for inference and training. Use Transformers to fine-tune models on your data, build inference applications, and for generative AI use cases across multiple modalities.
|
||||
|
||||
Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer
|
||||
vision, audio, video, and multimodal model, for both inference and training.
|
||||
|
||||
It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the
|
||||
pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training
|
||||
frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...),
|
||||
and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from `transformers`.
|
||||
|
||||
We pledge to help support new state-of-the-art models and democratize their usage by having their model definition be
|
||||
simple, customizable, and efficient.
|
||||
|
||||
There are over 1M+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use.
|
||||
There are over 500K+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use.
|
||||
|
||||
Explore the [Hub](https://huggingface.com/) today to find a model and use Transformers to help you get started right away.
|
||||
|
||||
## Installation
|
||||
|
||||
Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.1+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+.
|
||||
Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.0+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+.
|
||||
|
||||
Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager.
|
||||
|
||||
@ -88,6 +78,7 @@ Create and activate a virtual environment with [venv](https://docs.python.org/3/
|
||||
# venv
|
||||
python -m venv .my-env
|
||||
source .my-env/bin/activate
|
||||
|
||||
# uv
|
||||
uv venv .my-env
|
||||
source .my-env/bin/activate
|
||||
@ -97,10 +88,10 @@ Install Transformers in your virtual environment.
|
||||
|
||||
```py
|
||||
# pip
|
||||
pip install "transformers[torch]"
|
||||
pip install transformers
|
||||
|
||||
# uv
|
||||
uv pip install "transformers[torch]"
|
||||
uv pip install transformers
|
||||
```
|
||||
|
||||
Install Transformers from source if you want the latest changes in the library or are interested in contributing. However, the *latest* version may not be stable. Feel free to open an [issue](https://github.com/huggingface/transformers/issues) if you encounter an error.
|
||||
@ -108,12 +99,7 @@ Install Transformers from source if you want the latest changes in the library o
|
||||
```shell
|
||||
git clone https://github.com/huggingface/transformers.git
|
||||
cd transformers
|
||||
|
||||
# pip
|
||||
pip install .[torch]
|
||||
|
||||
# uv
|
||||
uv pip install .[torch]
|
||||
pip install .
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
@ -135,7 +121,7 @@ To chat with a model, the usage pattern is the same. The only difference is you
|
||||
> [!TIP]
|
||||
> You can also chat with a model directly from the command line.
|
||||
> ```shell
|
||||
> transformers chat Qwen/Qwen2.5-0.5B-Instruct
|
||||
> transformers-cli chat --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct
|
||||
> ```
|
||||
|
||||
```py
|
||||
|
@ -27,6 +27,13 @@ These models require the `trust_remote_code=True` parameter to be set when using
|
||||
the content of the modeling files when using this argument. We recommend setting a revision in order to ensure you
|
||||
protect yourself from updates on the repository.
|
||||
|
||||
#### Tools
|
||||
|
||||
Through the `Agent` framework, remote tools can be downloaded to be used by the Agent. You're to specify these tools
|
||||
yourself, but please keep in mind that their code will be run on your machine if the Agent chooses to run them.
|
||||
|
||||
Please inspect the code of the tools before passing them to the Agent to protect your runtime and local setup.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
|
||||
|
@ -288,7 +288,7 @@ Keywords: Music understanding, Music generation
|
||||
|
||||
## [dalle-flow](https://github.com/jina-ai/dalle-flow)
|
||||
|
||||
DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. It leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
|
||||
DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. Itt leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
|
||||
The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR.
|
||||
|
||||
Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR
|
||||
@ -526,7 +526,7 @@ Keywords: Model deployment, CLoud, Mobile, Edge
|
||||
|
||||
## [underthesea](https://github.com/undertheseanlp/underthesea)
|
||||
|
||||
[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provide extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
|
||||
[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provides extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
|
||||
|
||||
Keywords: Vietnamese, NLP
|
||||
|
||||
|
@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
## Writing metrics to the database
|
||||
|
||||
`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||
|
||||
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
||||
|
||||
|
@ -90,7 +90,7 @@ def summarize(run_dir, metrics, expand_metrics=False):
|
||||
|
||||
model = benchmark.config.backend["model"]
|
||||
|
||||
# This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
|
||||
# Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
|
||||
# (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
|
||||
benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
|
||||
benchmark_name = str(Path(benchmark_name).parts[-1])
|
||||
|
@ -2,11 +2,12 @@ import argparse
|
||||
import importlib.util
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict
|
||||
import psycopg2
|
||||
import sys
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
from psycopg2.extensions import register_adapter
|
||||
|
||||
|
||||
register_adapter(dict, Json)
|
||||
@ -17,26 +18,23 @@ class ImportModuleException(Exception):
|
||||
|
||||
|
||||
class MetricsRecorder:
|
||||
def __init__(
|
||||
self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str
|
||||
):
|
||||
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
|
||||
self.conn = connection
|
||||
self.conn.autocommit = True
|
||||
self.logger = logger
|
||||
self.repository = repository
|
||||
self.branch = branch
|
||||
self.commit_id = commit_id
|
||||
self.commit_msg = commit_msg
|
||||
|
||||
def initialise_benchmark(self, metadata: dict[str, str]) -> int:
|
||||
def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
|
||||
"""
|
||||
Creates a new benchmark, returns the benchmark id
|
||||
"""
|
||||
# gpu_name: str, model_id: str
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
)
|
||||
benchmark_id = cur.fetchone()[0]
|
||||
logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||
@ -55,7 +53,7 @@ class MetricsRecorder:
|
||||
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||
)
|
||||
|
||||
def collect_model_measurements(self, benchmark_id: int, measurements: dict[str, float]):
|
||||
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
@ -85,18 +83,12 @@ handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def parse_arguments() -> tuple[str, str, str, str]:
|
||||
def parse_arguments():
|
||||
"""
|
||||
Parse command line arguments for the benchmarking CLI.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
||||
|
||||
parser.add_argument(
|
||||
"repository",
|
||||
type=str,
|
||||
help="The repository name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"branch",
|
||||
type=str,
|
||||
@ -117,7 +109,7 @@ def parse_arguments() -> tuple[str, str, str, str]:
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args.repository, args.branch, args.commit_id, args.commit_msg
|
||||
return args.branch, args.commit_id, args.commit_msg
|
||||
|
||||
|
||||
def import_from_path(module_name, file_path):
|
||||
@ -134,7 +126,7 @@ def import_from_path(module_name, file_path):
|
||||
if __name__ == "__main__":
|
||||
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
repository, branch, commit_id, commit_msg = parse_arguments()
|
||||
branch, commit_id, commit_msg = parse_arguments()
|
||||
|
||||
for entry in os.scandir(benchmarks_folder_path):
|
||||
try:
|
||||
@ -145,7 +137,7 @@ if __name__ == "__main__":
|
||||
logger.debug(f"loading: {entry.name}")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
logger.info(f"running benchmarks in: {entry.name}")
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
|
||||
module.run_benchmark(logger, branch, commit_id, commit_msg)
|
||||
except ImportModuleException as e:
|
||||
logger.error(e)
|
||||
except Exception as e:
|
||||
|
@ -1,6 +1,5 @@
|
||||
CREATE TABLE IF NOT EXISTS benchmarks (
|
||||
benchmark_id SERIAL PRIMARY KEY,
|
||||
repository VARCHAR(255),
|
||||
branch VARCHAR(255),
|
||||
commit_id VARCHAR(72),
|
||||
commit_message VARCHAR(70),
|
||||
|
@ -33,15 +33,11 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||
sleep(0.01)
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100
|
||||
):
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
continue_metric_collection = Event()
|
||||
metrics_thread = None
|
||||
model_id = "meta-llama/Llama-2-7b-hf"
|
||||
metrics_recorder = MetricsRecorder(
|
||||
psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg
|
||||
)
|
||||
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||
try:
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_name = gpu_stats[0]["name"]
|
||||
@ -122,7 +118,7 @@ def run_benchmark(
|
||||
with torch.no_grad():
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate,
|
||||
@ -148,7 +144,7 @@ def run_benchmark(
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate,
|
||||
@ -191,7 +187,7 @@ def run_benchmark(
|
||||
# TODO use decode_one_token(model, input_id.clone(), cache_position) for verification
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate + 10,
|
||||
@ -208,7 +204,7 @@ def run_benchmark(
|
||||
time_to_first_token = end - start
|
||||
logger.info(f"completed first compile generation in: {time_to_first_token}s")
|
||||
cache_position += 1
|
||||
all_generated_tokens += next_token.tolist()
|
||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||
|
||||
cache_position = torch.tensor([seq_length], device=device)
|
||||
### First compile, decoding
|
||||
@ -219,9 +215,9 @@ def run_benchmark(
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_second_token = end - start
|
||||
logger.info(f"completed second compile generation in: {time_to_second_token}s")
|
||||
logger.info(f"completed second compile generation in: {time_to_first_token}s")
|
||||
cache_position += 1
|
||||
all_generated_tokens += next_token.tolist()
|
||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||
|
||||
### Second compile, decoding
|
||||
start = perf_counter()
|
||||
@ -231,15 +227,15 @@ def run_benchmark(
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_third_token = end - start
|
||||
logger.info(f"completed third compile forward in: {time_to_third_token}s")
|
||||
logger.info(f"completed third compile forward in: {time_to_first_token}s")
|
||||
cache_position += 1
|
||||
all_generated_tokens += next_token.tolist()
|
||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||
|
||||
### Using cuda graphs decoding
|
||||
|
||||
start = perf_counter()
|
||||
for _ in range(1, num_tokens_to_generate):
|
||||
all_generated_tokens += next_token.tolist()
|
||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||
next_token = decode_one_token(
|
||||
model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
|
||||
)
|
||||
@ -258,7 +254,7 @@ def run_benchmark(
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@ -275,7 +271,7 @@ def run_benchmark(
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@ -291,23 +287,23 @@ def run_benchmark(
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
|
||||
# 3rd call
|
||||
# 3nd call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
third_compile_generate_time = end - start
|
||||
logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
|
||||
logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@ -317,7 +313,7 @@ def run_benchmark(
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
fourth_compile_generate_time = end - start
|
||||
logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
|
||||
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
metrics_recorder.collect_model_measurements(
|
||||
|
@ -28,7 +28,6 @@ from transformers.testing_utils import HfDoctestModule, HfDocTestParser
|
||||
|
||||
NOT_DEVICE_TESTS = {
|
||||
"test_tokenization",
|
||||
"test_tokenization_mistral_common",
|
||||
"test_processor",
|
||||
"test_processing",
|
||||
"test_beam_constraints",
|
||||
@ -47,6 +46,10 @@ NOT_DEVICE_TESTS = {
|
||||
"test_keep_in_fp32_modules",
|
||||
"test_gradient_checkpointing_backward_compatibility",
|
||||
"test_gradient_checkpointing_enable_disable",
|
||||
"test_save_load_fast_init_from_base",
|
||||
"test_fast_init_context_manager",
|
||||
"test_fast_init_tied_embeddings",
|
||||
"test_save_load_fast_init_to_base",
|
||||
"test_torch_save_load",
|
||||
"test_initialization",
|
||||
"test_forward_signature",
|
||||
@ -67,6 +70,7 @@ NOT_DEVICE_TESTS = {
|
||||
"ModelTester::test_pipeline_",
|
||||
"/repo_utils/",
|
||||
"/utils/",
|
||||
"/agents/",
|
||||
}
|
||||
|
||||
# allow having multiple repository checkouts and not needing to remember to rerun
|
||||
@ -83,6 +87,7 @@ def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
|
||||
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
|
||||
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
|
||||
config.addinivalue_line("markers", "agent_tests: mark the agent tests that are run on their specific schedule")
|
||||
config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
|
||||
|
||||
|
||||
|
@ -2,10 +2,10 @@ FROM python:3.9-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ffmpeg
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps timm accelerate
|
||||
RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
|
||||
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
|
||||
|
@ -2,10 +2,10 @@ FROM python:3.9-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -2,10 +2,10 @@ FROM python:3.9-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ffmpeg
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words]"
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN git lfs install
|
||||
|
||||
RUN uv pip install --no-cache-dir pypi-kenlm
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -9,11 +9,11 @@ SHELL ["sh", "-lc"]
|
||||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
ARG PYTORCH='2.7.1'
|
||||
ARG PYTORCH='2.6.0'
|
||||
# (not always a valid torch version)
|
||||
ARG INTEL_TORCH_EXT='2.3.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
# Disable kernel mapping for now until all tests pass
|
||||
ENV DISABLE_KERNEL_MAPPING=1
|
||||
ARG CUDA='cu121'
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
|
||||
@ -26,11 +26,11 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
|
||||
# 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
|
||||
# 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
|
||||
# Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA && python3 -m pip uninstall -y tensorflow tensorflow_text tensorflow_probability
|
||||
RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 "tensorflow_text<2.16" "tensorflow_probability<0.22" && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip uninstall -y flax jax
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U timm
|
||||
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT -f https://developer.intel.com/ipex-whl-stable-cpu
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
|
||||
|
||||
# For video model testing
|
||||
RUN python3 -m pip install --no-cache-dir av
|
||||
RUN python3 -m pip install --no-cache-dir av==9.2.0
|
||||
|
||||
# Some slow tests require bnb
|
||||
RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
@ -57,8 +57,7 @@ RUN python3 -m pip uninstall -y ninja
|
||||
|
||||
# For `dinat` model
|
||||
# The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
|
||||
# pin `0.17.4` otherwise `cannot import name 'natten2dav' from 'natten.functional'`
|
||||
RUN python3 -m pip install --no-cache-dir natten==0.17.4+torch250cu121 -f https://shi-labs.com/natten/wheels
|
||||
RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels
|
||||
|
||||
# For `nougat` tokenizer
|
||||
RUN python3 -m pip install --no-cache-dir python-Levenshtein
|
||||
@ -69,12 +68,6 @@ RUN python3 -m pip install --no-cache-dir g2p-en
|
||||
# For Some bitsandbytes tests
|
||||
RUN python3 -m pip install --no-cache-dir einops
|
||||
|
||||
# For Some tests with `@require_liger_kernel`
|
||||
RUN python3 -m pip install --no-cache-dir liger-kernel
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM rocm/pytorch:rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.7.1
|
||||
FROM rocm/dev-ubuntu-22.04:6.2.4
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -11,6 +11,9 @@ RUN apt update && \
|
||||
RUN git lfs install
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
|
||||
|
||||
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
|
||||
|
||||
ARG REF=main
|
||||
@ -20,12 +23,8 @@ WORKDIR /
|
||||
ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# On ROCm, torchcodec is required to decode audio files
|
||||
RUN python3 -m pip install --no-cache-dir torchcodec
|
||||
# Install transformers
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio]
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
|
||||
|
||||
# Remove tensorflow and flax as they are no longer supported by transformers
|
||||
RUN python3 -m pip uninstall -y tensorflow flax
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
@ -34,6 +33,3 @@ RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
|
||||
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
|
||||
|
||||
# `kernels` may causes many failing tests
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
@ -48,6 +48,3 @@ RUN python3 -c "from deepspeed.launcher.runner import main"
|
||||
|
||||
# Remove nvml as it is not compatible with ROCm
|
||||
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
|
||||
|
||||
# `kernels` may causes many failing tests
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
@ -1,12 +1,12 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
|
||||
FROM nvcr.io/nvidia/pytorch:23.11-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG PYTORCH='2.7.1'
|
||||
ARG PYTORCH='2.2.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
ARG CUDA='cu121'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
@ -15,13 +15,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
|
||||
# Install latest release PyTorch
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@ -45,9 +44,6 @@ RUN python3 -m pip uninstall -y deepspeed
|
||||
# TODO: Find out why test fail.
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -1,11 +1,11 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
FROM nvcr.io/nvidia/pytorch:23.11-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
ARG CUDA='cu121'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
@ -19,10 +19,9 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio
|
||||
# Install **nightly** release PyTorch (flag `--pre`)
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@ -57,9 +56,6 @@ RUN python3 -m pip uninstall -y deepspeed
|
||||
#RUN git clone https://github.com/pytorch/TensorRT.git
|
||||
#RUN cd TensorRT/py && python3 setup.py install --fx-only
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -11,28 +11,23 @@ ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# If set to nothing, will install the latest version
|
||||
ARG PYTORCH='2.7.1'
|
||||
ARG PYTORCH='2.6.0'
|
||||
ARG TORCH_VISION=''
|
||||
ARG TORCH_AUDIO=''
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
ARG CUDA='cu121'
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
|
||||
|
||||
# Install torch stuff after ./transformers[dev-torch,testing,video], otherwise torch may be resolved to a previous
|
||||
# version.
|
||||
RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' || VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
|
||||
|
||||
RUN python3 -m pip uninstall -y tensorflow flax
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -1,93 +0,0 @@
|
||||
FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04 AS base
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG PYTHON_VER=3.11
|
||||
ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get remove -y python3.10 && apt-get autoremove -y
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository -y ppa:deadsnakes/ppa && \
|
||||
apt-get update && \
|
||||
apt-get install -y python$PYTHON_VER python$PYTHON_VER-dev python3-pip && \
|
||||
ln -sf /usr/bin/python$PYTHON_VER /usr/bin/python3 && \
|
||||
ln -sf /usr/bin/python3 /usr/bin/python && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install \
|
||||
apt-utils \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
clinfo \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
vim \
|
||||
numactl \
|
||||
gnupg2 \
|
||||
gpg-agent \
|
||||
zlib1g-dev \
|
||||
rsync \
|
||||
sudo \
|
||||
libnl-genl-3-200 \
|
||||
xpu-smi \
|
||||
unzip \
|
||||
ffmpeg \
|
||||
tesseract-ocr \
|
||||
espeak-ng \
|
||||
wget \
|
||||
ncurses-term && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
linux-headers-$(uname -r) \
|
||||
linux-modules-extra-$(uname -r) \
|
||||
flex bison \
|
||||
intel-fw-gpu intel-i915-dkms xpu-smi \
|
||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc \
|
||||
libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install triton==3.3.0
|
||||
|
||||
RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
|
||||
|
||||
RUN pip install evaluate torchdata pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sacremoses nltk rouge_score librosa soundfile g2p_en mpi4py requests_mock
|
||||
RUN pip install pretty_midi essentia resampy Levenshtein av sacrebleu phonemizer invisible_watermark schedulefree
|
||||
RUN pip install gguf hqq compressed_tensors gptqmodel mergekit autoawq deepspeed torchao onnx
|
||||
RUN pip install hf_transfer huggingface-hub hf-doc-builder datasets optimum-quanto timm transformers accelerate optimum peft
|
||||
|
||||
RUN pip install git+https://github.com/linkedin/Liger-Kernel.git --extra-index-url https://download.pytorch.org/whl/test/xpu
|
||||
|
||||
# install bitsandbytes
|
||||
RUN pip install git+https://github.com/bitsandbytes-foundation/bitsandbytes.git
|
||||
|
||||
ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors
|
||||
ENV FI_PROVIDER_PATH=${I_MPI_ROOT}/lib/libfabric/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
||||
ENV CCL_ROOT=/usr/local
|
||||
ENV CCL_ATL_TRANSPORT=ofi
|
||||
ENV I_MPI_ROOT=/usr/local
|
||||
ENV CLASSPATH=${I_MPI_ROOT}/lib/mpi.jar
|
||||
ENV PATH=${I_MPI_ROOT}/bin/libfabric:${PATH}
|
||||
ENV LD_LIBRARY_PATH=${I_MPI_ROOT}/lib/libfabric:${LD_LIBRARY_PATH}
|
||||
|
||||
RUN touch /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
RUN echo "#!/bin/bash" >> /entrypoint.sh
|
||||
RUN echo "source /opt/intel/oneapi/setvars.sh --force && /bin/bash" >> /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
@ -12,8 +12,6 @@ SHELL ["sh", "-lc"]
|
||||
ARG PYTORCH='2.6.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu121'
|
||||
# Disable kernel mapping for quantization tests
|
||||
ENV DISABLE_KERNEL_MAPPING=1
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
|
||||
@ -26,7 +24,7 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch';
|
||||
RUN echo torch=$VERSION
|
||||
# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
|
||||
# Currently, let's just use their latest releases (when `torch` is installed with a release version)
|
||||
RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@ -78,27 +76,15 @@ RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submod
|
||||
# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1
|
||||
# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git
|
||||
|
||||
# Add fp-quant for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir "fp-quant>=0.1.6"
|
||||
|
||||
# Add compressed-tensors for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir compressed-tensors
|
||||
|
||||
# Add AMD Quark for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir amd-quark
|
||||
|
||||
# Add AutoRound for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir "auto-round>=0.5.0"
|
||||
|
||||
# Add transformers in editable mode
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# Uninstall flash-attn installed by autoawq, it causes issues here : https://github.com/huggingface/transformers/actions/runs/15915442841/job/44892146131
|
||||
RUN python3 -m pip uninstall -y flash-attn
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -278,7 +278,7 @@ Here's an example of a single value return:
|
||||
|
||||
```python
|
||||
Returns:
|
||||
`list[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
|
||||
`List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
|
||||
```
|
||||
|
||||
Here's an example of a tuple return, comprising several objects:
|
||||
|
@ -23,6 +23,8 @@
|
||||
title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT
|
||||
- local: model_sharing
|
||||
title: مشاركة نموذجك
|
||||
- local: agents
|
||||
title: الوكلاء
|
||||
- local: llm_tutorial
|
||||
title: التوليد باستخدام LLMs
|
||||
- local: conversations
|
||||
@ -250,6 +252,8 @@
|
||||
title: أطر مفاهيمية
|
||||
# - sections:
|
||||
# - sections:
|
||||
# - local: main_classes/agent
|
||||
# title: الوكلاء والأدوات
|
||||
# - local: model_doc/auto
|
||||
# title: فئات يتم إنشاؤها ديناميكيًا
|
||||
# - local: main_classes/backbones
|
||||
|
539
docs/source/ar/agents.md
Normal file
539
docs/source/ar/agents.md
Normal file
@ -0,0 +1,539 @@
|
||||
# الوكلاء والأدوات
|
||||
|
||||
[[open-in-colab]]
|
||||
|
||||
### ما هو الوكيل؟
|
||||
|
||||
يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها.
|
||||
|
||||
يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل".
|
||||
|
||||
الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات".
|
||||
|
||||
هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح.
|
||||
|
||||
يمكن برمجة الوكيل للقيام بما يلي:
|
||||
- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال
|
||||
- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال
|
||||
|
||||
### أنواع الوكلاء
|
||||
|
||||
#### الوكيل البرمجي (Code agent)
|
||||
|
||||
يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط.
|
||||
|
||||
#### وكلاء التفاعل
|
||||
|
||||
هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة.
|
||||
|
||||
نقوم بتنفيذ إصدارين من ReactJsonAgent:
|
||||
- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها.
|
||||
- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء قوي في البرمجة.
|
||||
|
||||
> [!TIP]
|
||||
> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct.
|
||||
|
||||

|
||||
|
||||
على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي.
|
||||
|
||||
```py3
|
||||
>>> agent.run(
|
||||
... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
|
||||
... )
|
||||
=====New task=====
|
||||
How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
|
||||
====Agent is executing the code below:
|
||||
bert_blocks = search(query="number of blocks in BERT base encoder")
|
||||
print("BERT blocks:", bert_blocks)
|
||||
====
|
||||
Print outputs:
|
||||
BERT blocks: twelve encoder blocks
|
||||
|
||||
====Agent is executing the code below:
|
||||
attention_layer = search(query="number of layers in Attention is All You Need")
|
||||
print("Attention layers:", attention_layer)
|
||||
====
|
||||
Print outputs:
|
||||
Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
|
||||
|
||||
====Agent is executing the code below:
|
||||
bert_blocks = 12
|
||||
attention_layers = 6
|
||||
diff = bert_blocks - attention_layers
|
||||
print("Difference in blocks:", diff)
|
||||
final_answer(diff)
|
||||
====
|
||||
|
||||
Print outputs:
|
||||
Difference in blocks: 6
|
||||
|
||||
Final answer: 6
|
||||
```
|
||||
|
||||
### كيف يمكنني بناء وكيل؟
|
||||
|
||||
لتهيئة وكيل، تحتاج إلى هذه الوسائط:
|
||||
|
||||
- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له.
|
||||
- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته.
|
||||
- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها
|
||||
- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها
|
||||
|
||||
عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا.
|
||||
|
||||
للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية.
|
||||
|
||||
```bash
|
||||
pip install transformers[agents]
|
||||
```
|
||||
|
||||
قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد.
|
||||
|
||||
```python
|
||||
from huggingface_hub import login, InferenceClient
|
||||
|
||||
login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
|
||||
|
||||
client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
|
||||
def llm_engine(messages, stop_sequences=["Task"]) -> str:
|
||||
response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
|
||||
answer = response.choices[0].message.content
|
||||
return answer
|
||||
```
|
||||
|
||||
يمكنك استخدام أي طريقة `llm_engine` طالما أنها:
|
||||
1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str`
|
||||
2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop`
|
||||
|
||||
أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`.
|
||||
|
||||
الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى.
|
||||
|
||||
```python
|
||||
from transformers import CodeAgent, HfEngine
|
||||
|
||||
llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
|
||||
agent.run(
|
||||
"Could you translate this sentence from French, say it out loud and return the audio.",
|
||||
sentence="Où est la boulangerie la plus proche?",
|
||||
)
|
||||
```
|
||||
|
||||
هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي.
|
||||
|
||||
```python
|
||||
from transformers import CodeAgent
|
||||
|
||||
agent = CodeAgent(tools=[], add_base_tools=True)
|
||||
|
||||
agent.run(
|
||||
"Could you translate this sentence from French, say it out loud and give me the audio.",
|
||||
sentence="Où est la boulangerie la plus proche?",
|
||||
)
|
||||
```
|
||||
|
||||
لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج.
|
||||
|
||||
يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها:
|
||||
|
||||
```py
|
||||
from transformers import ReactCodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
|
||||
agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
|
||||
```
|
||||
|
||||
|
||||
تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك.
|
||||
|
||||
```python
|
||||
print(agent.system_prompt_template)
|
||||
```
|
||||
|
||||
من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها.
|
||||
كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا.
|
||||
يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`.
|
||||
|
||||
|
||||
#### تنفيذ التعليمات البرمجية
|
||||
|
||||
يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك.
|
||||
يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه.
|
||||
|
||||
مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة.
|
||||
يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]:
|
||||
|
||||
```py
|
||||
>>> from transformers import ReactCodeAgent
|
||||
|
||||
>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
|
||||
>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
|
||||
|
||||
(...)
|
||||
'Hugging Face – Blog'
|
||||
```
|
||||
|
||||
سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل.
|
||||
|
||||
> [!WARNING]
|
||||
> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة!
|
||||
|
||||
### موجه النظام
|
||||
|
||||
ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً).
|
||||
|
||||
```text
|
||||
You will be given a task to solve as best you can.
|
||||
You have access to the following tools:
|
||||
<<tool_descriptions>>
|
||||
|
||||
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
|
||||
|
||||
At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
|
||||
Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '/End code' sequence.
|
||||
During each intermediate step, you can use 'print()' to save whatever important information you will then need.
|
||||
These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
|
||||
|
||||
In the end you have to return a final answer using the `final_answer` tool.
|
||||
|
||||
Here are a few examples using notional tools:
|
||||
---
|
||||
{examples}
|
||||
|
||||
Above example were using notional tools that might not exist for you. You only have access to those tools:
|
||||
<<tool_names>>
|
||||
You also can perform computations in the python code you generate.
|
||||
|
||||
Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```<end_code>' sequence. You MUST provide at least the 'Code:' sequence to move forward.
|
||||
|
||||
Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
|
||||
Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
|
||||
|
||||
Remember to make sure that variables you use are all defined.
|
||||
|
||||
Now Begin!
|
||||
```
|
||||
|
||||
يتضمن موجه النظام:
|
||||
- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها.
|
||||
- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<<tool_descriptions>>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها.
|
||||
- يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه.
|
||||
- شكل المخرج المتوقع.
|
||||
|
||||
يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات.
|
||||
|
||||
للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`.
|
||||
|
||||
```python
|
||||
from transformers import ReactJsonAgent
|
||||
from transformers.agents import PythonInterpreterTool
|
||||
|
||||
agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> يرجى التأكد من تحديد سلسلة `<<tool_descriptions>>` في مكان ما في `template` حتى يكون الوكيل على علم
|
||||
بالأدوات المتاحة.
|
||||
|
||||
|
||||
### فحص تشغيل الوكيل
|
||||
|
||||
فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل:
|
||||
- تخزن `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`.
|
||||
- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة.
|
||||
|
||||
## الأدوات
|
||||
|
||||
الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة.
|
||||
|
||||
يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة.
|
||||
|
||||
عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا.
|
||||
|
||||
### صندوق الأدوات الافتراضي
|
||||
|
||||
يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`:
|
||||
|
||||
- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut))
|
||||
- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt))
|
||||
- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper))
|
||||
- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5))
|
||||
- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف.
|
||||
- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python
|
||||
لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS:
|
||||
|
||||
يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها.
|
||||
|
||||
```python
|
||||
from transformers import load_tool
|
||||
|
||||
tool = load_tool("text-to-speech")
|
||||
audio = tool("This is a text to speech tool")
|
||||
```
|
||||
|
||||
### إنشاء أداة جديدة
|
||||
|
||||
يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face.
|
||||
على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub.
|
||||
|
||||
سوف نبدأ بالكود التالي.
|
||||
|
||||
```python
|
||||
from huggingface_hub import list_models
|
||||
|
||||
task = "text-classification"
|
||||
|
||||
model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
|
||||
print(model.id)
|
||||
```
|
||||
|
||||
يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`].
|
||||
|
||||
تحتاج الأداة المخصصة إلى:
|
||||
|
||||
- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`.
|
||||
- تستخدم خاصية `description` لملء موجه نظام الوكيل.
|
||||
- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات.
|
||||
- خاصية `output_type`، والتي تحدد نوع المخرج.
|
||||
- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية.
|
||||
|
||||
```python
|
||||
from transformers import Tool
|
||||
from huggingface_hub import list_models
|
||||
|
||||
class HFModelDownloadsTool(Tool):
|
||||
name = "model_download_counter"
|
||||
description = (
|
||||
"This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
|
||||
"It returns the name of the checkpoint."
|
||||
)
|
||||
|
||||
inputs = {
|
||||
"task": {
|
||||
"type": "text",
|
||||
"description": "the task category (such as text-classification, depth-estimation, etc)",
|
||||
}
|
||||
}
|
||||
output_type = "text"
|
||||
|
||||
def forward(self, task: str):
|
||||
model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
|
||||
return model.id
|
||||
```
|
||||
|
||||
الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام.
|
||||
|
||||
```python
|
||||
from model_downloads import HFModelDownloadsTool
|
||||
|
||||
tool = HFModelDownloadsTool()
|
||||
```
|
||||
|
||||
يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة.
|
||||
|
||||
```python
|
||||
tool.push_to_hub("{your_username}/hf-model-downloads")
|
||||
```
|
||||
|
||||
قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك.
|
||||
|
||||
```python
|
||||
from transformers import load_tool, CodeAgent
|
||||
|
||||
model_download_tool = load_tool("m-ric/hf-model-downloads")
|
||||
agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
|
||||
agent.run(
|
||||
"Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
|
||||
)
|
||||
```
|
||||
|
||||
ستحصل على ما يلي:
|
||||
|
||||
```text
|
||||
======== New task ========
|
||||
Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
|
||||
==== Agent is executing the code below:
|
||||
most_downloaded_model = model_download_counter(task="text-to-video")
|
||||
print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
|
||||
====
|
||||
```
|
||||
|
||||
والناتج:
|
||||
|
||||
`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."`
|
||||
|
||||
### إدارة صندوق أدوات الوكيل الخاص بك
|
||||
|
||||
إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة.
|
||||
|
||||
دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي.
|
||||
|
||||
```python
|
||||
from transformers import CodeAgent
|
||||
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
agent.toolbox.add_tool(model_download_tool)
|
||||
```
|
||||
|
||||
الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة:
|
||||
|
||||
```python
|
||||
agent.run(
|
||||
"Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
|
||||
)
|
||||
```
|
||||
|
||||
| **Audio** |
|
||||
|------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
|
||||
|
||||
> [!WARNING]
|
||||
> احترس عند إضافة أدوات إلى وكيل يعمل بالفعل لأنه يمكن أن يؤثر على اختيار الأداة لصالح أداتك أو اختيار أداة أخرى غير المحددة بالفعل.
|
||||
|
||||
استخدم طريقة `agent.toolbox.update_tool()` لاستبدال أداة موجودة في صندوق أدوات الوكيل.
|
||||
هذا مفيد إذا كانت أداتك الجديدة بديلاً مباشرًا للأداة الموجودة لأن الوكيل يعرف بالفعل كيفية تنفيذ تلك المهمة المحددة.
|
||||
تأكد فقط من اتباع الأداة الجديدة لنفس واجهة برمجة التطبيقات (API) للأداة المستبدلة أو قم بتكييف قالب موجه النظام لضمان تحديث جميع الأمثلة التي تستخدم الأداة المستبدلة.
|
||||
|
||||
### استخدام مجموعة من الأدوات
|
||||
|
||||
يمكنك الاستفادة من مجموعات الأدوات باستخدام كائن ToolCollection، مع تحديد مجموعة الأدوات التي تريد استخدامها.
|
||||
ثم قم بتمريرها كقائمة لتهيئة الوكيل الخاص بك، وبدء استخدامها!
|
||||
|
||||
```py
|
||||
from transformers import ToolCollection, ReactCodeAgent
|
||||
|
||||
image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
|
||||
agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
|
||||
|
||||
agent.run("Please draw me a picture of rivers and lakes.")
|
||||
```
|
||||
|
||||
لتسريع البداية، يتم تحميل الأدوات فقط إذا استدعاها الوكيل.
|
||||
|
||||
ستحصل على هذه الصورة:
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" />
|
||||
|
||||
### استخدام gradio-tools
|
||||
|
||||
[gradio-tools](https://github.com/freddyaboulton/gradio-tools) هي مكتبة قوية تتيح استخدام Hugging
|
||||
Face Spaces كأدوات. تدعم العديد من المساحات الموجودة بالإضافة إلى مساحات مخصصة.
|
||||
|
||||
تدعم مكتبة Transformers `gradio_tools` باستخدام طريقة [`Tool.from_gradio`] في الفئة. على سبيل المثال، دعنا نستخدم [`StableDiffusionPromptGeneratorTool`](https://github.com/freddyaboulton/gradio-tools/blob/main/gradio_tools/tools/prompt_generator.py) من مجموعة أدوات `gradio-tools` لتحسين المطالبات لإنشاء صور أفضل.
|
||||
|
||||
استورد وقم بتهيئة الأداة، ثم مررها إلى طريقة `Tool.from_gradio`:
|
||||
|
||||
```python
|
||||
from gradio_tools import StableDiffusionPromptGeneratorTool
|
||||
from transformers import Tool, load_tool, CodeAgent
|
||||
|
||||
gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
|
||||
prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
|
||||
```
|
||||
|
||||
الآن يمكنك استخدامه مثل أي أداة أخرى. على سبيل المثال، دعنا نحسن الموجه `a rabbit wearing a space suit`.
|
||||
|
||||
```python
|
||||
image_generation_tool = load_tool('huggingface-tools/text-to-image')
|
||||
agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
|
||||
|
||||
agent.run(
|
||||
"Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
|
||||
)
|
||||
```
|
||||
|
||||
يستفيد النموذج بشكل كافٍ من الأداة:
|
||||
|
||||
```text
|
||||
======== New task ========
|
||||
Improve this prompt, then generate an image of it.
|
||||
You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
|
||||
==== Agent is executing the code below:
|
||||
improved_prompt = StableDiffusionPromptGenerator(query=prompt)
|
||||
while improved_prompt == "QUEUE_FULL":
|
||||
improved_prompt = StableDiffusionPromptGenerator(query=prompt)
|
||||
print(f"The improved prompt is {improved_prompt}.")
|
||||
image = image_generator(prompt=improved_prompt)
|
||||
====
|
||||
```
|
||||
|
||||
قبل إنشاء الصورة أخيرًا:
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp" />
|
||||
|
||||
> [!WARNING]
|
||||
> تتطلب gradio-tools إدخالات وإخراجات *نصية* حتى عند العمل مع طرائق مختلفة مثل كائنات الصور والصوت. الإدخالات والإخراجات الصورية والصوتية غير متوافقة حاليًا.
|
||||
|
||||
### استخدام أدوات LangChain
|
||||
|
||||
نحن نحب Langchain ونعتقد أنها تحتوي على مجموعة أدوات قوية للغاية.
|
||||
لاستيراد أداة من LangChain، استخدم الطريقة `from_langchain()`.
|
||||
|
||||
فيما يلي كيفية استخدامها لإعادة إنشاء نتيجة البحث في المقدمة باستخدام أداة بحث الويب LangChain.
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from transformers import Tool, ReactCodeAgent
|
||||
|
||||
search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
|
||||
|
||||
agent = ReactCodeAgent(tools=[search_tool])
|
||||
|
||||
agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
|
||||
```
|
||||
|
||||
## واجهة Gradio
|
||||
|
||||
يمكنك الاستفادة من `gradio.Chatbot` لعرض أفكار الوكيل الخاص بك باستخدام `stream_to_gradio`، إليك مثال:
|
||||
|
||||
```py
|
||||
import gradio as gr
|
||||
from transformers import (
|
||||
load_tool,
|
||||
ReactCodeAgent,
|
||||
HfEngine,
|
||||
stream_to_gradio,
|
||||
)
|
||||
|
||||
# Import tool from Hub
|
||||
image_generation_tool = load_tool("m-ric/text-to-image")
|
||||
|
||||
llm_engine = HfEngine("meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
|
||||
# Initialize the agent with the image generation tool
|
||||
agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
|
||||
|
||||
|
||||
def interact_with_agent(task):
|
||||
messages = []
|
||||
messages.append(gr.ChatMessage(role="user", content=task))
|
||||
yield messages
|
||||
for msg in stream_to_gradio(agent, task):
|
||||
messages.append(msg)
|
||||
yield messages + [
|
||||
gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
|
||||
]
|
||||
yield messages
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
|
||||
submit = gr.Button("Run illustrator agent!")
|
||||
chatbot = gr.Chatbot(
|
||||
label="Agent",
|
||||
type="messages",
|
||||
avatar_images=(
|
||||
None,
|
||||
"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
|
||||
),
|
||||
)
|
||||
submit.click(interact_with_agent, [text_input], [chatbot])
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
```
|
@ -3,16 +3,16 @@
|
||||
يُشهد في الآونة الأخيرة نمو مجال دراسي يُعنى باستكشاف آلية عمل نماذج المحولات الضخمة مثل BERT (والذي يُطلق عليها البعض اسم "BERTology"). ومن الأمثلة البارزة على هذا المجال ما يلي:
|
||||
|
||||
- BERT Rediscovers the Classical NLP Pipeline بواسطة Ian Tenney و Dipanjan Das و Ellie Pavlick:
|
||||
https://huggingface.co/papers/1905.05950
|
||||
- Are Sixteen Heads Really Better than One? بواسطة Paul Michel و Omer Levy و Graham Neubig: https://huggingface.co/papers/1905.10650
|
||||
https://arxiv.org/abs/1905.05950
|
||||
- Are Sixteen Heads Really Better than One? بواسطة Paul Michel و Omer Levy و Graham Neubig: https://arxiv.org/abs/1905.10650
|
||||
- What Does BERT Look At? An Analysis of BERT's Attention بواسطة Kevin Clark و Urvashi Khandelwal و Omer Levy و Christopher D.
|
||||
Manning: https://huggingface.co/papers/1906.04341
|
||||
- CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://huggingface.co/papers/2210.04633
|
||||
Manning: https://arxiv.org/abs/1906.04341
|
||||
- CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
|
||||
|
||||
لإثراء هذا المجال الناشئ، قمنا بتضمين بعض الميزات الإضافية في نماذج BERT/GPT/GPT-2 للسماح للناس بالوصول إلى التمثيلات الداخلية، والتي تم تكييفها بشكل أساسي من العمل الرائد لـ Paul Michel (https://huggingface.co/papers/1905.10650):
|
||||
لإثراء هذا المجال الناشئ، قمنا بتضمين بعض الميزات الإضافية في نماذج BERT/GPT/GPT-2 للسماح للناس بالوصول إلى التمثيلات الداخلية، والتي تم تكييفها بشكل أساسي من العمل الرائد لـ Paul Michel (https://arxiv.org/abs/1905.10650):
|
||||
|
||||
- الوصول إلى جميع الحالات المخفية في BERT/GPT/GPT-2،
|
||||
- الوصول إلى جميع أوزان الانتباه لكل رأس في BERT/GPT/GPT-2،
|
||||
- استرجاع قيم ومشتقات مخرجات الرأس لحساب درجة أهمية الرأس وحذفه كما هو موضح في https://huggingface.co/papers/1905.10650.
|
||||
- استرجاع قيم ومشتقات مخرجات الرأس لحساب درجة أهمية الرأس وحذفه كما هو موضح في https://arxiv.org/abs/1905.10650.
|
||||
|
||||
ولمساعدتك على فهم واستخدام هذه الميزات بسهولة، أضفنا مثالًا برمجيًا محددًا: [bertology.py](https://github.com/huggingface/transformers-research-projects/tree/main/bertology/run_bertology.py) أثناء استخراج المعلومات وتقليص من نموذج تم تدريبه مسبقًا على GLUE.
|
@ -30,7 +30,7 @@ class ResnetConfig(PretrainedConfig):
|
||||
def __init__(
|
||||
self,
|
||||
block_type="bottleneck",
|
||||
layers: list[int] = [3, 4, 6, 3],
|
||||
layers: List[int] = [3, 4, 6, 3],
|
||||
num_classes: int = 1000,
|
||||
input_channels: int = 3,
|
||||
cardinality: int = 1,
|
||||
@ -280,7 +280,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
|
||||
الآن لإرسال النموذج إلى Hub، تأكد من تسجيل الدخول. إما تشغيل في المحطة الأوامر الطرفية الخاصة بك:
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
أو من دفتر ملاحظات:
|
||||
|
@ -77,7 +77,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
|
||||
|
||||
الآن لديك إمكانية الوصول إلى النسخة الكامل غير المكممة للنموذج في بيئة PyTorch، حيث يمكنك دمجه مع مجموعة كبيرة من الأدوات الأخرى.
|
||||
|
||||
لإعادة التحويل إلى ملف `gguf`، نوصي باستخدام ملف [`convert-hf-to-gguf.py`](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) من llama.cpp.
|
||||
لإعادة التحويل إلى ملف `gguf`، نوصي باستخدام ملف [`convert-hf-to-gguf.py`](https://github.com/ggerganov/llama.cpp/blob/master/convert-hf-to-gguf.py) من llama.cpp.
|
||||
|
||||
فيما يلي كيفية إكمال البرنامج النصي أعلاه لحفظ النموذج وإعادة تصديره مرة أخرى إلى `gguf`:
|
||||
|
||||
|
@ -135,7 +135,7 @@
|
||||
في كل وحدة الانتباه الباقية في المحولات، تلي طبقة الاهتمام الانتباه عادة طبقتان للتغذية الأمامية.
|
||||
حجم تضمين الطبقة الأمامية الوسيطة أكبر عادة من حجم المخفي للنموذج (على سبيل المثال، لـ
|
||||
`google-bert/bert-base-uncased`).
|
||||
بالنسبة لإدخال بحجم `[batch_size, sequence_length]`، يمكن أن تمثل الذاكرة المطلوبة لتخزين التضمينات الأمامية الوسيطة `[batch_size، sequence_length, config.intermediate_size]` جزءًا كبيرًا من استخدام الذاكرة. لاحظ مؤلفو (https://huggingface.co/papers/2001.04451)[Reformer: The Efficient Transformer] أنه نظرًا لأن الحساب مستقل عن بعد `sequence_length`، فإنه من المكافئ رياضيًا حساب تضمينات الإخراج الأمامية `[batch_size، config.hidden_size]_0, ..., [batch_size، `config_size]_n
|
||||
بالنسبة لإدخال بحجم `[batch_size, sequence_length]`، يمكن أن تمثل الذاكرة المطلوبة لتخزين التضمينات الأمامية الوسيطة `[batch_size، sequence_length, config.intermediate_size]` جزءًا كبيرًا من استخدام الذاكرة. لاحظ مؤلفو (https://arxiv.org/abs/2001.04451)[Reformer: The Efficient Transformer] أنه نظرًا لأن الحساب مستقل عن بعد `sequence_length`، فإنه من المكافئ رياضيًا حساب تضمينات الإخراج الأمامية `[batch_size، config.hidden_size]_0, ..., [batch_size، `config_size]_n
|
||||
فردياً والتوصيل بها لاحقًا إلى `[batch_size, sequence_length, config.hidden_size]` مع `n = sequence_length`، والذي يتداول زيادة وقت الحساب مقابل تقليل استخدام الذاكرة، ولكنه ينتج عنه نتيجة مكافئة رياضيا.
|
||||
|
||||
بالنسبة للنماذج التي تستخدم الدالة `[apply_chunking_to_forward]`، يحدد `chunk_size` عدد التضمينات يتم حساب الإخراج بالتوازي وبالتالي يحدد المقايضة بين حجم الذاكرة والتعقيد الوقت. إذا تم تعيين `chunk_size` إلى `0`، فلن يتم إجراء تجزئة التغذية الأمامية.
|
||||
@ -173,7 +173,7 @@
|
||||
|
||||
<Youtube id="VFp38yj8h3A"/>
|
||||
|
||||
يعمل كل محلل لغوي بشكل مختلف ولكن الآلية الأساسية تبقى كما هي. إليك مثال باستخدام محلل BERT اللغوي، والذي يعد محلل لغوي [WordPiece](https://huggingface.co/papers/1609.08144):
|
||||
يعمل كل محلل لغوي بشكل مختلف ولكن الآلية الأساسية تبقى كما هي. إليك مثال باستخدام محلل BERT اللغوي، والذي يعد محلل لغوي [WordPiece](https://arxiv.org/pdf/1609.08144.pdf):
|
||||
|
||||
```python
|
||||
>>> from transformers import BertTokenizer
|
||||
|
@ -6,7 +6,7 @@
|
||||
تحقق نماذج اللغة الكبيرة (LLMs) مثل GPT3/4، [Falcon](https://huggingface.co/tiiuae/falcon-40b)، و [Llama](https://huggingface.co/meta-llama/Llama-2-70b-hf) تقدمًا سريعًا في قدرتها على معالجة المهام التي تركز على الإنسان، مما يجعلها أدوات أساسية في الصناعات القائمة على المعرفة الحديثة.
|
||||
لا يزال نشر هذه النماذج في المهام الواقعية يمثل تحديًا، ومع ذلك:
|
||||
|
||||
- لكي تظهر نماذج اللغة الكبيرة قدرات فهم وتوليد النصوص قريبة من قدرات الإنسان، فإنها تتطلب حاليًا إلى تكوينها من مليارات المعلمات (انظر [كابلان وآخرون](https://huggingface.co/papers/2001.08361)، [وي وآخرون](https://huggingface.co/papers/2206.07682)). وهذا بدوره يزيد من متطلبات الذاكرة للاستدلال.
|
||||
- لكي تظهر نماذج اللغة الكبيرة قدرات فهم وتوليد النصوص قريبة من قدرات الإنسان، فإنها تتطلب حاليًا إلى تكوينها من مليارات المعلمات (انظر [كابلان وآخرون](https://arxiv.org/abs/2001.08361)، [وي وآخرون](https://arxiv.org/abs/2206.07682)). وهذا بدوره يزيد من متطلبات الذاكرة للاستدلال.
|
||||
- في العديد من المهام الواقعية، تحتاج نماذج اللغة الكبيرة إلى معلومات سياقية شاملة. يتطلب ذلك قدرة النموذج على إدارة تسلسلات إدخال طويلة للغاية أثناء الاستدلال.
|
||||
|
||||
يكمن جوهر صعوبة هذه التحديات في تعزيز القدرات الحسابية والذاكرة لنماذج اللغة الكبيرة، خاصة عند التعامل مع تسلسلات الإدخال الضخمة.
|
||||
@ -17,7 +17,7 @@
|
||||
|
||||
2. **اFlash Attention:** إن Flash Attention وهي نسخة مُعدَّلة من خوارزمية الانتباه التي لا توفر فقط نهجًا أكثر كفاءة في استخدام الذاكرة، ولكنها تحقق أيضًا كفاءة متزايدة بسبب الاستخدام الأمثل لذاكرة GPU.
|
||||
|
||||
3. **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://huggingface.co/papers/2108.12409)، [الترميز الدوار](https://huggingface.co/papers/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://huggingface.co/papers/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)]((https://huggingface.co/papers/2305.13245)).
|
||||
3. **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://arxiv.org/abs/2108.12409)، [الترميز الدوار](https://arxiv.org/abs/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://arxiv.org/abs/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)]((https://arxiv.org/abs/2305.13245)).
|
||||
|
||||
على مدار هذا الدليل، سنقدم تحليلًا للتوليد التنبؤي التلقائي من منظور المُوتِّرات. نتعمق في مزايا وعيوب استخدام دقة أقل، ونقدم استكشافًا شاملاً لخوارزميات الانتباه الأحدث، ونناقش بنيات نماذج نماذج اللغة الكبيرة المحسنة. سندعم الشرح بأمثلة عملية تُبرِز كل تحسين على حدة.
|
||||
|
||||
@ -152,8 +152,8 @@ from accelerate.utils import release_memory
|
||||
release_memory(model)
|
||||
```
|
||||
|
||||
والآن ماذا لو لم يكن لدى وحدة معالجة الرسومات (GPU) لديك 32 جيجا بايت من ذاكرة الفيديو العشوائية (VRAM)؟ لقد وجد أن أوزان النماذج يمكن تحويلها إلى 8 بتات أو 4 بتات دون خسارة كبيرة في الأداء (انظر [Dettmers et al.](https://huggingface.co/papers/2208.07339)).
|
||||
يمكن تحويل النموذج إلى 3 بتات أو 2 بتات مع فقدان مقبول في الأداء كما هو موضح في ورقة [GPTQ](https://huggingface.co/papers/2210.17323) 🤯.
|
||||
والآن ماذا لو لم يكن لدى وحدة معالجة الرسومات (GPU) لديك 32 جيجا بايت من ذاكرة الفيديو العشوائية (VRAM)؟ لقد وجد أن أوزان النماذج يمكن تحويلها إلى 8 بتات أو 4 بتات دون خسارة كبيرة في الأداء (انظر [Dettmers et al.](https://arxiv.org/abs/2208.07339)).
|
||||
يمكن تحويل النموذج إلى 3 بتات أو 2 بتات مع فقدان مقبول في الأداء كما هو موضح في ورقة [GPTQ](https://arxiv.org/abs/2210.17323) 🤯.
|
||||
|
||||
دون الدخول في الكثير من التفاصيل، تهدف مخططات التكميم إلى تخفيض دقة الأوزان مع محاولة الحفاظ على دقة نتائج النموذج كما هي (*أي* أقرب ما يمكن إلى bfloat16).
|
||||
لاحظ أن التكميم يعمل بشكل خاص جيدًا لتوليد النص حيث كل ما نهتم به هو اختيار *مجموعة الرموز الأكثر احتمالًا التالية* ولا نهتم حقًا بالقيم الدقيقة لتوزيع الرمز التالي *logit*.
|
||||
@ -231,7 +231,7 @@ flush()
|
||||
دعنا نرى ما هو استهلاك ذاكرة GPU الذروة الذي يوفره تكميم 4 بت. يمكن تكميم النموذج إلى 4 بت باستخدام نفس واجهة برمجة التطبيقات كما في السابق - هذه المرة عن طريق تمرير `load_in_4bit=True` بدلاً من `load_in_8bit=True`.
|
||||
|
||||
```python
|
||||
model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, pad_token_id=0)
|
||||
model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
|
||||
|
||||
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
||||
|
||||
@ -304,7 +304,7 @@ $$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\math
|
||||
|
||||
مع تحسن LLMs في فهم النص وتوليد النص، يتم تطبيقها على مهام متزايدة التعقيد. في حين أن النماذج كانت تتعامل سابقًا مع ترجمة أو تلخيص بضع جمل، فإنها الآن تدير صفحات كاملة، مما يتطلب القدرة على معالجة أطوال إدخال واسعة.
|
||||
|
||||
كيف يمكننا التخلص من متطلبات الذاكرة الباهظة للتطويلات المدخلة الكبيرة؟ نحن بحاجة إلى طريقة جديدة لحساب آلية الاهتمام الذاتي التي تتخلص من مصفوفة \\( QK^T \\). [طريقه داو وآخرون.](https://huggingface.co/papers/2205.14135) طوروا بالضبط مثل هذا الخوارزمية الجديدة وأطلقوا عليها اسم **Flash Attention**.
|
||||
كيف يمكننا التخلص من متطلبات الذاكرة الباهظة للتطويلات المدخلة الكبيرة؟ نحن بحاجة إلى طريقة جديدة لحساب آلية الاهتمام الذاتي التي تتخلص من مصفوفة \\( QK^T \\). [طريقه داو وآخرون.](Https://arxiv.org/abs/2205.14135) طوروا بالضبط مثل هذا الخوارزمية الجديدة وأطلقوا عليها اسم **Flash Attention**.
|
||||
|
||||
باختصار، يكسر الاهتمام الفلاشي حساب \\( \mathbf{V} \times \operatorname{Softmax}(\mathbf{QK}^T\\)) ويحسب بدلاً من ذلك قطعًا أصغر من الإخراج عن طريق التكرار عبر العديد من خطوات حساب Softmax:
|
||||
|
||||
@ -318,7 +318,7 @@ $$ \textbf{O}_i \leftarrow s^a_{ij} * \textbf{O}_i + s^b_{ij} * \mathbf{V}_{j} \
|
||||
|
||||
> من خلال تتبع إحصائيات التطبيع softmax واستخدام بعض الرياضيات الذكية، يعطي Flash Attention **مخرجات متطابقة رقميًا** مقارنة بطبقة الاهتمام الذاتي الافتراضية بتكلفة ذاكرة لا تزيد خطيًا مع \\( N \\).
|
||||
|
||||
عند النظر إلى الصيغة، قد يقول المرء بديهيًا أن الاهتمام الفلاشي يجب أن يكون أبطأ بكثير مقارنة بصيغة الاهتمام الافتراضية حيث يلزم إجراء المزيد من الحسابات. في الواقع، يتطلب Flash Attention المزيد من عمليات الفاصلة العائمة مقارنة بالاهتمام العادي حيث يجب إعادة حساب إحصائيات التطبيع softmax باستمرار (راجع [الورقة](https://huggingface.co/papers/2205.14135) لمزيد من التفاصيل إذا كنت مهتمًا)
|
||||
عند النظر إلى الصيغة، قد يقول المرء بديهيًا أن الاهتمام الفلاشي يجب أن يكون أبطأ بكثير مقارنة بصيغة الاهتمام الافتراضية حيث يلزم إجراء المزيد من الحسابات. في الواقع، يتطلب Flash Attention المزيد من عمليات الفاصلة العائمة مقارنة بالاهتمام العادي حيث يجب إعادة حساب إحصائيات التطبيع softmax باستمرار (راجع [الورقة](https://arxiv.org/abs/2205.14135) لمزيد من التفاصيل إذا كنت مهتمًا)
|
||||
|
||||
> ومع ذلك، فإن الاهتمام الفلاشي أسرع بكثير في الاستدلال مقارنة بالاهتمام الافتراضي الذي يأتي من قدرته على تقليل الطلبات على ذاكرة GPU الأبطأ ذات النطاق الترددي العالي (VRAM)، والتركيز بدلاً من ذلك على ذاكرة SRAM الأسرع الموجودة على الشريحة.
|
||||
|
||||
@ -535,20 +535,20 @@ flush()
|
||||
لكي يفهم LLM ترتيب الجملة، يلزم وجود *إشارة* إضافية ويتم تطبيقها عادةً في شكل *الترميزات الموضعية* (أو ما يُطلق عليه أيضًا *الترميزات الموضعية*).
|
||||
لم يتم ترجمة النص الخاص والروابط وأكواد HTML وCSS بناءً على طلبك.
|
||||
|
||||
قدم مؤلفو الورقة البحثية [*Attention Is All You Need*](https://huggingface.co/papers/1706.03762) تضمينات موضعية جيبية مثلثية \\( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \\) حيث يتم حساب كل متجه \\( \mathbf{p}_i \\) كدالة جيبية لموضعه \\( i \\) .
|
||||
قدم مؤلفو الورقة البحثية [*Attention Is All You Need*](https://arxiv.org/abs/1706.03762) تضمينات موضعية جيبية مثلثية \\( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \\) حيث يتم حساب كل متجه \\( \mathbf{p}_i \\) كدالة جيبية لموضعه \\( i \\) .
|
||||
بعد ذلك يتم ببساطة إضافة التضمينات الموضعية إلى متجهات تسلسل الإدخال \\( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \\) = \\( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \\) وبالتالي توجيه النموذج لتعلم ترتيب الجملة بشكل أفضل.
|
||||
|
||||
بدلاً من استخدام التضمينات الموضعية الثابتة، استخدم آخرون (مثل [Devlin et al.](https://huggingface.co/papers/1810.04805)) تضمينات موضعية مكتسبة يتم من خلالها تعلم التضمينات الموضعية \\( \mathbf{P} \\) أثناء التدريب.
|
||||
بدلاً من استخدام التضمينات الموضعية الثابتة، استخدم آخرون (مثل [Devlin et al.](https://arxiv.org/abs/1810.04805)) تضمينات موضعية مكتسبة يتم من خلالها تعلم التضمينات الموضعية \\( \mathbf{P} \\) أثناء التدريب.
|
||||
|
||||
كانت التضمينات الموضعية الجيبية والمكتسبة هي الطرق السائدة لترميز ترتيب الجملة في نماذج اللغة الكبيرة، ولكن تم العثور على بعض المشكلات المتعلقة بهذه التضمينات الموضعية:
|
||||
|
||||
1. التضمينات الموضعية الجيبية والمكتسبة هي تضمينات موضعية مطلقة، أي ترميز تضمين فريد لكل معرف موضعي: \\( 0, \ldots, N \\) . كما أظهر [Huang et al.](https://huggingface.co/papers/2009.13658) و [Su et al.](https://huggingface.co/papers/2104.09864)، تؤدي التضمينات الموضعية المطلقة إلى أداء ضعيف لنماذج اللغة الكبيرة للمدخلات النصية الطويلة. بالنسبة للمدخلات النصية الطويلة، يكون من المفيد إذا تعلم النموذج المسافة الموضعية النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض بدلاً من موضعها المطلق.
|
||||
1. التضمينات الموضعية الجيبية والمكتسبة هي تضمينات موضعية مطلقة، أي ترميز تضمين فريد لكل معرف موضعي: \\( 0, \ldots, N \\) . كما أظهر [Huang et al.](https://arxiv.org/abs/2009.13658) و [Su et al.](https://arxiv.org/abs/2104.09864)، تؤدي التضمينات الموضعية المطلقة إلى أداء ضعيف لنماذج اللغة الكبيرة للمدخلات النصية الطويلة. بالنسبة للمدخلات النصية الطويلة، يكون من المفيد إذا تعلم النموذج المسافة الموضعية النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض بدلاً من موضعها المطلق.
|
||||
2. عند استخدام التضمينات الموضعية المكتسبة، يجب تدريب نموذج اللغة الكبيرة على طول إدخال ثابت \\( N \\)، مما يجعل من الصعب الاستقراء إلى طول إدخال أطول مما تم تدريبه عليه.
|
||||
|
||||
في الآونة الأخيرة، أصبحت التضمينات الموضعية النسبية التي يمكنها معالجة المشكلات المذكورة أعلاه أكثر شعبية، وأبرزها:
|
||||
|
||||
- [تضمين الموضع الدوراني (RoPE)](https://huggingface.co/papers/2104.09864)
|
||||
- [ALiBi](https://huggingface.co/papers/2108.12409)
|
||||
- [تضمين الموضع الدوراني (RoPE)](https://arxiv.org/abs/2104.09864)
|
||||
- [ALiBi](https://arxiv.org/abs/2108.12409)
|
||||
|
||||
يؤكد كل من *RoPE* و *ALiBi* أنه من الأفضل توجيه نموذج اللغة الكبيرة حول ترتيب الجملة مباشرة في خوارزمية الانتباه الذاتي حيث يتم وضع رموز الكلمات في علاقة مع بعضها البعض. على وجه التحديد، يجب توجيه ترتيب الجملة عن طريق تعديل عملية \\( \mathbf{QK}^T \\) .
|
||||
|
||||
@ -563,14 +563,14 @@ $$ \mathbf{\hat{q}}_i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}_{\theta
|
||||
يستخدم *RoPE* في العديد من نماذج اللغة الكبيرة الأكثر أهمية اليوم، مثل:
|
||||
|
||||
- [**Falcon**](https://huggingface.co/tiiuae/falcon-40b)
|
||||
- [**Llama**](https://huggingface.co/papers/2302.13971)
|
||||
- [**PaLM**](https://huggingface.co/papers/2204.02311)
|
||||
- [**Llama**](https://arxiv.org/abs/2302.13971)
|
||||
- [**PaLM**](https://arxiv.org/abs/2204.02311)
|
||||
|
||||
كبديل، يقترح *ALiBi* مخطط ترميز موضعي نسبي أبسط بكثير. يتم إضافة المسافة النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض كعدد صحيح سلبي مقياس بقيمة محددة مسبقًا `m` إلى كل إدخال استعلام-مفتاح لمصفوفة \\( \mathbf{QK}^T \\) مباشرة قبل حساب softmax.
|
||||
|
||||

|
||||
|
||||
كما هو موضح في ورقة [ALiBi](https://huggingface.co/papers/2108.12409)، يسمح هذا الترميز الموضعي النسبي البسيط للنموذج بالحفاظ على أداء عالٍ حتى في تسلسلات المدخلات النصية الطويلة جدًا.
|
||||
كما هو موضح في ورقة [ALiBi](https://arxiv.org/abs/2108.12409)، يسمح هذا الترميز الموضعي النسبي البسيط للنموذج بالحفاظ على أداء عالٍ حتى في تسلسلات المدخلات النصية الطويلة جدًا.
|
||||
|
||||
يُستخدم *ALiBi* في العديد من أهم نماذج اللغة الكبيرة المستخدمة اليوم، مثل:
|
||||
|
||||
@ -579,7 +579,7 @@ $$ \mathbf{\hat{q}}_i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}_{\theta
|
||||
|
||||
يمكن لكل من ترميزات الموضع *RoPE* و *ALiBi* الاستقراء إلى أطوال إدخال لم يتم ملاحظتها أثناء التدريب، في حين ثبت أن الاستقراء يعمل بشكل أفضل بكثير خارج الصندوق لـ *ALiBi* مقارنة بـ *RoPE*.
|
||||
بالنسبة لـ ALiBi، ما عليك سوى زيادة قيم مصفوفة الموضع المثلث السفلي لمطابقة طول تسلسل الإدخال.
|
||||
بالنسبة لـ *RoPE*، يؤدي الحفاظ على نفس \\( \theta \\) الذي تم استخدامه أثناء التدريب إلى نتائج سيئة عند تمرير إدخالات نصية أطول بكثير من تلك التي شوهدت أثناء التدريب، راجع [Press et al.](https://huggingface.co/papers/2108.12409). ومع ذلك، وجد المجتمع بعض الحيل الفعالة التي تقوم بتعديل \\( \theta \\)، مما يسمح لترميزات الموضع *RoPE* بالعمل بشكل جيد لتسلسلات إدخال النص المستقرئة (راجع [هنا](https://github.com/huggingface/transformers/pull/24653)).
|
||||
بالنسبة لـ *RoPE*، يؤدي الحفاظ على نفس \\( \theta \\) الذي تم استخدامه أثناء التدريب إلى نتائج سيئة عند تمرير إدخالات نصية أطول بكثير من تلك التي شوهدت أثناء التدريب، راجع [Press et al.](https://arxiv.org/abs/2108.12409). ومع ذلك، وجد المجتمع بعض الحيل الفعالة التي تقوم بتعديل \\( \theta \\)، مما يسمح لترميزات الموضع *RoPE* بالعمل بشكل جيد لتسلسلات إدخال النص المستقرئة (راجع [هنا](https://github.com/huggingface/transformers/pull/24653)).
|
||||
|
||||
> كل من RoPE و ALiBi عبارة عن ترميزات موضع نسبي *لا* يتم تعلمها أثناء التدريب، ولكن بدلاً من ذلك تستند إلى الحدس التالي:
|
||||
- يجب إعطاء الإشارات الموضعية حول إدخالات النص مباشرة إلى مصفوفة \\( QK^T \\) لطبقة الاهتمام الذاتي
|
||||
@ -755,21 +755,21 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات
|
||||
|
||||
#### 3.2.2 Multi-Query-Attention (MQA)
|
||||
|
||||
[Multi-Query-Attention](https://huggingface.co/papers/1911.02150) اقترحها Noam Shazeer في ورقته *Fast Transformer Decoding: One Write-Head is All You Need*. كما يقول العنوان، اكتشف Noam أنه بدلاً من استخدام `n_head` من أوزان إسقاط القيمة الرئيسية، يمكن استخدام زوج واحد من أوزان إسقاط رأس القيمة التي يتم مشاركتها عبر جميع رؤوس الاهتمام دون أن يتدهور أداء النموذج بشكل كبير.
|
||||
[Multi-Query-Attention](https://arxiv.org/abs/1911.02150) اقترحها Noam Shazeer في ورقته *Fast Transformer Decoding: One Write-Head is All You Need*. كما يقول العنوان، اكتشف Noam أنه بدلاً من استخدام `n_head` من أوزان إسقاط القيمة الرئيسية، يمكن استخدام زوج واحد من أوزان إسقاط رأس القيمة التي يتم مشاركتها عبر جميع رؤوس الاهتمام دون أن يتدهور أداء النموذج بشكل كبير.
|
||||
|
||||
> باستخدام زوج واحد من أوزان إسقاط رأس القيمة، يجب أن تكون متجهات القيمة الرئيسية \\( \mathbf{k}_i، \mathbf{v}_i \\) متطابقة عبر جميع رؤوس الاهتمام والتي بدورها تعني أننا بحاجة فقط إلى تخزين زوج إسقاط قيمة رئيسي واحد في ذاكرة التخزين المؤقت بدلاً من `n_head` منها.
|
||||
|
||||
نظرًا لأن معظم LLMs تستخدم ما بين 20 و100 رأس اهتمام، فإن MQA يقلل بشكل كبير من استهلاك الذاكرة لذاكرة التخزين المؤقت key-value. بالنسبة إلى LLM المستخدم في هذا الدفتر، يمكننا تقليل استهلاك الذاكرة المطلوبة من 15 جيجابايت إلى أقل من 400 ميجابايت عند طول تسلسل الإدخال 16000.
|
||||
|
||||
بالإضافة إلى توفير الذاكرة، يؤدي MQA أيضًا إلى تحسين الكفاءة الحسابية كما هو موضح في ما يلي.
|
||||
في فك التشفير التلقائي، يجب إعادة تحميل متجهات القيمة الرئيسية الكبيرة، ودمجها مع زوج متجه القيمة الحالي، ثم إدخالها في \\( \mathbf{q}_c\mathbf{K}^T \\) الحساب في كل خطوة. بالنسبة لفك التشفير التلقائي، يمكن أن تصبح عرض النطاق الترددي للذاكرة المطلوبة لإعادة التحميل المستمر عنق زجاجة زمنيًا خطيرًا. من خلال تقليل حجم متجهات القيمة الرئيسية، يجب الوصول إلى ذاكرة أقل، وبالتالي تقليل عنق الزجاجة في عرض النطاق الترددي للذاكرة. لمزيد من التفاصيل، يرجى إلقاء نظرة على [ورقة Noam](https://huggingface.co/papers/1911.02150).
|
||||
في فك التشفير التلقائي، يجب إعادة تحميل متجهات القيمة الرئيسية الكبيرة، ودمجها مع زوج متجه القيمة الحالي، ثم إدخالها في \\( \mathbf{q}_c\mathbf{K}^T \\) الحساب في كل خطوة. بالنسبة لفك التشفير التلقائي، يمكن أن تصبح عرض النطاق الترددي للذاكرة المطلوبة لإعادة التحميل المستمر عنق زجاجة زمنيًا خطيرًا. من خلال تقليل حجم متجهات القيمة الرئيسية، يجب الوصول إلى ذاكرة أقل، وبالتالي تقليل عنق الزجاجة في عرض النطاق الترددي للذاكرة. لمزيد من التفاصيل، يرجى إلقاء نظرة على [ورقة Noam](https://arxiv.org/abs/1911.02150).
|
||||
|
||||
الجزء المهم الذي يجب فهمه هنا هو أن تقليل عدد رؤوس الاهتمام بالقيمة الرئيسية إلى 1 لا معنى له إلا إذا تم استخدام ذاكرة التخزين المؤقت للقيمة الرئيسية. يظل الاستهلاك الذروي لذاكرة النموذج لمرور واحد للأمام بدون ذاكرة التخزين المؤقت للقيمة الرئيسية دون تغيير لأن كل رأس اهتمام لا يزال لديه متجه استعلام فريد بحيث يكون لكل رأس اهتمام مصفوفة \\( \mathbf{QK}^T \\) مختلفة.
|
||||
|
||||
شهدت MQA اعتمادًا واسع النطاق من قبل المجتمع ويتم استخدامها الآن بواسطة العديد من LLMs الأكثر شهرة:
|
||||
|
||||
- [**Falcon**](https://huggingface.co/tiiuae/falcon-40b)
|
||||
- [**PaLM**](https://huggingface.co/papers/2204.02311)
|
||||
- [**PaLM**](https://arxiv.org/abs/2204.02311)
|
||||
- [**MPT**](https://huggingface.co/mosaicml/mpt-30b)
|
||||
- [**BLOOM**](https://huggingface.co/bigscience/bloom)
|
||||
|
||||
@ -777,7 +777,7 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات
|
||||
|
||||
#### 3.2.3 مجموعة الاستعلام الاهتمام (GQA)
|
||||
|
||||
[مجموعة الاستعلام الاهتمام](https://huggingface.co/papers/2305.13245)، كما اقترح Ainslie et al. من Google، وجد أن استخدام MQA يمكن أن يؤدي غالبًا إلى تدهور الجودة مقارنة باستخدام إسقاطات رأس القيمة الرئيسية المتعددة. تجادل الورقة بأنه يمكن الحفاظ على أداء النموذج بشكل أكبر عن طريق تقليل عدد أوزان إسقاط رأس الاستعلام بشكل أقل حدة. بدلاً من استخدام وزن إسقاط قيمة رئيسية واحدة فقط، يجب استخدام `n <n_head` أوزان إسقاط قيمة رئيسية. من خلال اختيار `n` إلى قيمة أقل بكثير من `n_head`، مثل 2 أو 4 أو 8، يمكن الاحتفاظ بمعظم مكاسب الذاكرة والسرعة من MQA مع التضحية بقدر أقل من سعة النموذج وبالتالي، من المفترض، أقل أداء.
|
||||
[مجموعة الاستعلام الاهتمام](https://arxiv.org/abs/2305.13245)، كما اقترح Ainslie et al. من Google، وجد أن استخدام MQA يمكن أن يؤدي غالبًا إلى تدهور الجودة مقارنة باستخدام إسقاطات رأس القيمة الرئيسية المتعددة. تجادل الورقة بأنه يمكن الحفاظ على أداء النموذج بشكل أكبر عن طريق تقليل عدد أوزان إسقاط رأس الاستعلام بشكل أقل حدة. بدلاً من استخدام وزن إسقاط قيمة رئيسية واحدة فقط، يجب استخدام `n <n_head` أوزان إسقاط قيمة رئيسية. من خلال اختيار `n` إلى قيمة أقل بكثير من `n_head`، مثل 2 أو 4 أو 8، يمكن الاحتفاظ بمعظم مكاسب الذاكرة والسرعة من MQA مع التضحية بقدر أقل من سعة النموذج وبالتالي، من المفترض، أقل أداء.
|
||||
|
||||
علاوة على ذلك، اكتشف مؤلفو GQA أنه يمكن *تدريب* نقاط تفتيش النموذج الموجودة ليكون لها بنية GQA باستخدام 5% فقط من الحوسبة الأصلية للتعليم المسبق. في حين أن 5% من الحوسبة الأصلية للتعليم المسبق يمكن أن تكون كمية هائلة، يسمح GQA *uptraining* بنقاط تفتيش موجودة للاستفادة من تسلسلات الإدخال الأطول.
|
||||
|
||||
@ -789,7 +789,7 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات
|
||||
|
||||
## الخاتمة
|
||||
|
||||
مجتمع البحث يأتي باستمرار بطرق جديدة ومبتكرة لتسريع وقت الاستدلال للنماذج اللغوية الكبيرة على الإطلاق. كمثال، أحد اتجاهات البحث الواعدة هو [فك التشفير التخميني](https://huggingface.co/papers/2211.17192) حيث تقوم "الرموز السهلة" بإنشائها نماذج اللغة الأصغر والأسرع ويتم إنشاء "الرموز الصعبة" فقط بواسطة LLM نفسه. إن التعمق في التفاصيل يتجاوز نطاق هذا الدفتر، ولكن يمكن قراءته في هذه [تدوينة المدونة اللطيفة](https://huggingface.co/blog/assisted-generation).
|
||||
مجتمع البحث يأتي باستمرار بطرق جديدة ومبتكرة لتسريع وقت الاستدلال للنماذج اللغوية الكبيرة على الإطلاق. كمثال، أحد اتجاهات البحث الواعدة هو [فك التشفير التخميني](https://arxiv.org/abs/2211.17192) حيث تقوم "الرموز السهلة" بإنشائها نماذج اللغة الأصغر والأسرع ويتم إنشاء "الرموز الصعبة" فقط بواسطة LLM نفسه. إن التعمق في التفاصيل يتجاوز نطاق هذا الدفتر، ولكن يمكن قراءته في هذه [تدوينة المدونة اللطيفة](https://huggingface.co/blog/assisted-generation).
|
||||
|
||||
السبب في أن LLMs الضخمة مثل GPT3/4، وLlama-2-70b، وClaude، وPaLM يمكن أن تعمل بسرعة كبيرة في واجهات الدردشة مثل [Hugging Face Chat](https://huggingface.co/chat/) أو ChatGPT يرجع إلى حد كبير إلى التحسينات المذكورة أعلاه في الدقة والخوارزميات والهندسة المعمارية.
|
||||
في المستقبل، ستكون أجهزة التسريع مثل وحدات معالجة الرسومات (GPUs) ووحدات معالجة الرسومات (TPUs)، وما إلى ذلك... ستكون أسرع فقط وستسمح بمزيد من الذاكرة، ولكن يجب دائمًا التأكد من استخدام أفضل الخوارزميات والهندسة المعمارية المتاحة للحصول على أكبر قدر من المال
|
||||
|
@ -165,7 +165,7 @@ default_args = {
|
||||
|
||||
يمكن أن تكون هذه المعرفة مفيدة لمعرفة عند تحليل اختناقات الأداء.
|
||||
|
||||
هذا الملخص مُشتق من [نقل البيانات هو كل ما تحتاجه: دراسة حالة حول تحسين المحولات 2020](https://huggingface.co/papers/2007.00072)
|
||||
هذا الملخص مُشتق من [نقل البيانات هو كل ما تحتاجه: دراسة حالة حول تحسين المحولات 2020](https://arxiv.org/abs/2007.00072)
|
||||
|
||||
|
||||
## تشريح ذاكرة النموذج
|
||||
|
@ -41,7 +41,7 @@ picture-in-picture" allowfullscreen></iframe>
|
||||
قبل مشاركة نموذج على Hub، ستحتاج إلى بيانات اعتماد حساب Hugging Face الخاصة بك. إذا كنت تستخدم منصة الأوامر، فقم بتشغيل الأمر التالي في بيئة افتراضية حيث تم تثبيت 🤗 Transformers. سيقوم هذا الأمر بتخزين رمز الدخول الخاص بك في مجلد تخزين المؤقت لـ Hugging Face (`~/.cache/` بشكل افتراضي):
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
إذا كنت تستخدم دفتر ملاحظات مثل Jupyter أو Colaboratory، فتأكد من تثبيت مكتبة [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library). تسمح لك هذه المكتبة بالتفاعل برمجيًا مع Hub.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# عائلة نماذج المحول
|
||||
|
||||
منذ إطلاقه في عام 2017، ألهم نموذج [المحول الأصلي](https://huggingface.co/papers/1706.03762) (راجع مدونة [المحول المشروح](http://nlp.seas.harvard.edu/2018/04/03/attention.html) لمقدمة تقنية مبسطة)، ألهم العديد من النماذج الجديدة والمبتكرة التي تتجاوز مهام معالجة اللغات الطبيعية (NLP). هناك نماذج للتنبؤ [بالبنية البروتينات المطوية](https://huggingface.co/blog/deep-learning-with-proteins)، و[تدريب على اتخاذ القرار](https://huggingface.co/blog/train-decision-transformers)، و[التنبؤ بالسلاسل الزمنية](https://huggingface.co/blog/time-series-transformers). مع وجود العديد من متغيرات المحول المتاحة، قد يكون من السهل أن تفوتك الصورة الأكبر. ما تشترك فيه جميع هذه النماذج هو أنها تستند إلى بنية المحول الأصلية. تستخدم بعض النماذج فقط الترميز أو فك الترميز، بينما تستخدم نماذج أخرى كليهما. يوفر هذا تصنيفًا مفيدًا لتصنيف واستعراض الفروقات الرئيسية بين نماذج عائلة المحولات، وسيساعدك على فهم النماذج التي لم تصادفها من قبل.
|
||||
منذ إطلاقه في عام 2017، ألهم نموذج [المحول الأصلي](https://arxiv.org/abs/1706.03762) (راجع مدونة [المحول المشروح](http://nlp.seas.harvard.edu/2018/04/03/attention.html) لمقدمة تقنية مبسطة)، ألهم العديد من النماذج الجديدة والمبتكرة التي تتجاوز مهام معالجة اللغات الطبيعية (NLP). هناك نماذج للتنبؤ [بالبنية البروتينات المطوية](https://huggingface.co/blog/deep-learning-with-proteins)، و[تدريب على اتخاذ القرار](https://huggingface.co/blog/train-decision-transformers)، و[التنبؤ بالسلاسل الزمنية](https://huggingface.co/blog/time-series-transformers). مع وجود العديد من متغيرات المحول المتاحة، قد يكون من السهل أن تفوتك الصورة الأكبر. ما تشترك فيه جميع هذه النماذج هو أنها تستند إلى بنية المحول الأصلية. تستخدم بعض النماذج فقط الترميز أو فك الترميز، بينما تستخدم نماذج أخرى كليهما. يوفر هذا تصنيفًا مفيدًا لتصنيف واستعراض الفروقات الرئيسية بين نماذج عائلة المحولات، وسيساعدك على فهم النماذج التي لم تصادفها من قبل.
|
||||
|
||||
إذا لم تكن على دراية بنموذج المحول الأصلي أو تحتاج إلى تذكير، فراجع الفصل الخاص بـ [كيف تعمل المحولات](https://huggingface.co/course/chapter1/4؟fw=pt) من دورة Hugging Face.
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
### الشبكة التلافيفية (Convolutional network)
|
||||
|
||||
لطالما كانت الشبكات التلافيفية (CNNs) الطريقة السائدة لمهام رؤية الحاسب حتى برز [محول الرؤية](https://huggingface.co/papers/2010.11929) قابليته للتطوير وكفاءته العالية. وحتى بعد ذلك، لا تزال بعض أفضل صفات CNN، مثل ثبات الإزاحة، قوية جدًا (خاصة بالنسبة لمهام معينة) لدرجة أن بعض المحولات تدمج التلافيف في بنيتها. قلب [ConvNeXt](model_doc/convnext) هذا التبادل رأسًا على عقب وأدرج خيارات التصميم من المحولات لتحديث CNN. على سبيل المثال، يستخدم ConvNeXt نوافذ منزلقة غير متداخلة لتقسيم الصورة إلى رقع وزيادة حقل مجال العام الخاص بها. كما يقوم ConvNeXt بعدة خيارات مثل تصميم الطبقة لتكون أكثر كفاءة في الذاكرة وتحسين الأداء، مما يجعله منافسًا قويًا للمحولات!
|
||||
لطالما كانت الشبكات التلافيفية (CNNs) الطريقة السائدة لمهام رؤية الحاسب حتى برز [محول الرؤية](https://arxiv.org/abs/2010.11929) قابليته للتطوير وكفاءته العالية. وحتى بعد ذلك، لا تزال بعض أفضل صفات CNN، مثل ثبات الإزاحة، قوية جدًا (خاصة بالنسبة لمهام معينة) لدرجة أن بعض المحولات تدمج التلافيف في بنيتها. قلب [ConvNeXt](model_doc/convnext) هذا التبادل رأسًا على عقب وأدرج خيارات التصميم من المحولات لتحديث CNN. على سبيل المثال، يستخدم ConvNeXt نوافذ منزلقة غير متداخلة لتقسيم الصورة إلى رقع وزيادة حقل مجال العام الخاص بها. كما يقوم ConvNeXt بعدة خيارات مثل تصميم الطبقة لتكون أكثر كفاءة في الذاكرة وتحسين الأداء، مما يجعله منافسًا قويًا للمحولات!
|
||||
|
||||
### الترميز[[cv-encoder]] (Encoder)
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
|
||||
نموذج [BERT](model_doc/bert) هو محوّل (Transformer) يعتمد على الترميز فقط يقوم بشكل عشوائي بإخفاء رموز معينة في المدخلات لتجنب رؤية باقى الرموز الأخرى، مما يسمح له "بالغش". يتمثل هدف التدريب المسبق في التنبؤ بالرمز المخفي بناءً على السياق. يسمح هذا لـ BERT باستخدام السياقات اليمنى واليسرى بالكامل لمساعدته في تعلم تمثيل أعمق وأغنى للبيانات المدخلة. ومع ذلك، كان هناك مجال للتحسين في استراتيجية التدريب المسبق لـ BERT. نموذج [RoBERTa](model_doc/roberta) اضاف تحسين من خلال تقديم وصفة تدريب مسبق جديدة تشمل التدريب لفترة أطول وعلى دفعات أكبر، وإخفاء الرموز عشوائيًا في كل حقبة بدلاً من مرة واحدة فقط أثناء المعالجة المسبقة، وإزالة هدف التنبؤ بالجملة التالية.
|
||||
|
||||
تتمثل الاستراتيجية السائدة لتحسين الأداء في زيادة حجم النموذج. ولكن تدريب النماذج الكبيرة مكلف من الناحية الحسابية. إحدى طرق تقليل التكاليف الحسابية هي استخدام نموذج أصغر مثل [DistilBERT](model_doc/distilbert). يستخدم DistilBERT [ تقنية تقطير المعرفة](https://huggingface.co/papers/1503.02531) - وهي تقنية ضغط - لإنشاء نموذج أصغر من BERT مع الحفاظ على معظم قدراته على فهم اللغةا.
|
||||
تتمثل الاستراتيجية السائدة لتحسين الأداء في زيادة حجم النموذج. ولكن تدريب النماذج الكبيرة مكلف من الناحية الحسابية. إحدى طرق تقليل التكاليف الحسابية هي استخدام نموذج أصغر مثل [DistilBERT](model_doc/distilbert). يستخدم DistilBERT [ تقنية تقطير المعرفة](https://arxiv.org/abs/1503.02531) - وهي تقنية ضغط - لإنشاء نموذج أصغر من BERT مع الحفاظ على معظم قدراته على فهم اللغةا.
|
||||
|
||||
مرت معظم نماذج المحول في الاتجاه نحو المزيد من المعلمات، مما أدى إلى ظهور نماذج جديدة تركز على تحسين كفاءة التدريب. يقلّل [ALBERT](model_doc/albert) من استهلاك الذاكرة عن طريق تقليل عدد المعلمات بطريقتين: فصل تضمين المفردات الأكبر إلى مصفوفتين أصغر والسماح للمستويات بمشاركة المعلمات. أضاف [DeBERTa](model_doc/deberta) آلية انتباه منفصلة حيث يتم ترميز الكلمة وموضعها بشكل منفصل في متجهين. يتم حساب الانتباه من هذه المتجهات المنفصلة بدلاً من متجه واحد يحتوي على تضمين الكلمة والموقع. ركز [Longformer](model_doc/longformer) أيضًا على جعل الانتباه أكثر كفاءة، خاصة لمعالجة المستندات ذات تسلسلات أطولل. فهو يستخدم مزيجًا من انتباه النوافذ المحلية (يتم حساب الانتباه فقط ن نافذة ذات حجم ثابت حول كل رمز) والانتباه العام (فقط لرموز مهمة محددة مثل `[CLS]` للتصنيف) لإنشاء مصفوفة انتباه متفرقة بدلاً من مصفوفة انتباه كاملة.
|
||||
|
||||
|
@ -33,7 +33,7 @@ pip install git+https://github.com/huggingface/peft.git
|
||||
|
||||
- [محولات الرتبة المنخفضة](https://huggingface.co/docs/peft/conceptual_guides/lora)
|
||||
- [IA3](https://huggingface.co/docs/peft/conceptual_guides/ia3)
|
||||
- [AdaLoRA](https://huggingface.co/papers/2303.10512)
|
||||
- [AdaLoRA](https://arxiv.org/abs/2303.10512)
|
||||
|
||||
إذا كنت تريد استخدام طرق PEFT الأخرى، مثل تعلم المحث أو ضبط المحث، أو حول مكتبة 🤗 PEFT بشكل عام، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/peft/index).
|
||||
|
||||
|
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
|
||||
يمكن لجميع النصوص البرمجية رفع نموذجك النهائي إلى [مركز النماذج](https://huggingface.co/models). تأكد من تسجيل الدخول إلى Hugging Face قبل البدء:
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
ثم أضف المعلمة `push_to_hub` إلى النص البرمجي . ستقوم هذه المعلمة بإنشاء مستودع باستخدام اسم مستخدم Hugging Face واسم المجلد المحدد في `output_dir`.
|
||||
|
@ -103,7 +103,7 @@
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/convolution.gif"/>
|
||||
</div>
|
||||
|
||||
<small>عملية التفاف أساسية بدون حشو أو خطو خطوة واسعة، مأخوذة من <a href="https://huggingface.co/papers/1603.07285">دليل لحساب الالتفاف للتعلم العميق.</a></small>
|
||||
<small>عملية التفاف أساسية بدون حشو أو خطو خطوة واسعة، مأخوذة من <a href="https://arxiv.org/abs/1603.07285">دليل لحساب الالتفاف للتعلم العميق.</a></small>
|
||||
|
||||
يمكنك تغذية هذا الناتج إلى طبقة التفاف أخرى، ومع كل طبقة متتالية، تتعلم الشبكة أشياء أكثر تعقيدًا وتجريدية مثل النقانق أو الصواريخ. بين طبقات الالتفاف، من الشائع إضافة طبقة تجميع لتقليل الأبعاد وجعل النموذج أكثر قوة للتغيرات في موضع الميزة.
|
||||
|
||||
|
@ -94,7 +94,7 @@
|
||||
|
||||
### ترميز الأزواج البايتية (BPE)
|
||||
|
||||
تم تقديم رميز أزواج البايت (BPE) في ورقة بحثية بعنوان [الترجمة الآلية العصبية للكلمات النادرة باستخدام وحدات subword (Sennrich et al.، 2015)](https://huggingface.co/papers/1508.07909). يعتمد BPE على مُجزّئ أولي يقسم بيانات التدريب إلى
|
||||
تم تقديم رميز أزواج البايت (BPE) في ورقة بحثية بعنوان [الترجمة الآلية العصبية للكلمات النادرة باستخدام وحدات subword (Sennrich et al.، 2015)](https://arxiv.org/abs/1508.07909). يعتمد BPE على مُجزّئ أولي يقسم بيانات التدريب إلى
|
||||
كلمات. يمكن أن يكون التحليل المسبق بسيطًا مثل التقسيم المكاني، على سبيل المثال [GPT-2](model_doc/gpt2)، [RoBERTa](model_doc/roberta). تشمل التقسيم الأكثر تقدمًا معتمد على التحليل القائم على القواعد، على سبيل المثال [XLM](model_doc/xlm)، [FlauBERT](model_doc/flaubert) الذي يستخدم Moses لمعظم اللغات، أو [GPT](model_doc/openai-gpt) الذي يستخدم spaCy و ftfy، لحساب تكرار كل كلمة في مجموعة بيانات التدريب.
|
||||
|
||||
بعد التحليل المسبق، يتم إنشاء مجموعة من الكلمات الفريدة وقد تم تحديد تكرار كل كلمة في تم تحديد بيانات التدريب. بعد ذلك، يقوم BPE بإنشاء مفردات أساسية تتكون من جميع الرموز التي تحدث في مجموعة الكلمات الفريدة ويتعلم قواعد الدمج لتشكيل رمز جديد من رمزين من المفردات الأساسية. إنه يفعل ذلك حتى تصل المفردات إلى حجم المفردات المطلوب. لاحظ أن حجم المفردات هو فرط معلمة لتحديد قبل تدريب مُجزّئ النصوص.
|
||||
@ -158,7 +158,7 @@ BPE. أولاً، يقوم WordPiece بتكوين المفردات لتضمين
|
||||
### Unigram
|
||||
|
||||
Unigram هو خوارزمية توكنيز subword التي تم تقديمها في [تنظيم subword: تحسين نماذج الترجمة الشبكة العصبية
|
||||
نماذج مع مرشحين subword متعددة (Kudo، 2018)](https://huggingface.co/papers/1804.10959). على عكس BPE أو
|
||||
نماذج مع مرشحين subword متعددة (Kudo، 2018)](https://arxiv.org/pdf/1804.10959.pdf). على عكس BPE أو
|
||||
WordPiece، يقوم Unigram بتكوين مفرداته الأساسية إلى عدد كبير من الرموز ويقللها تدريجياً للحصول على مفردات أصغر. يمكن أن تتوافق المفردات الأساسية على سبيل المثال مع جميع الكلمات المسبقة التوكنز والسلاسل الفرعية الأكثر شيوعًا. لا يتم استخدام Unigram مباشرة لأي من النماذج في المحولات، ولكنه يستخدم بالاقتران مع [SentencePiece](#sentencepiece).
|
||||
|
||||
في كل خطوة تدريب، يحدد خوارزمية Unigram خسارة (غالبًا ما يتم تعريفها على أنها اللوغاريتم) عبر بيانات التدريب بالنظر إلى المفردات الحالية ونموذج اللغة unigram. بعد ذلك، بالنسبة لكل رمز في المفردات، يحسب الخوارزمية مقدار زيادة الخسارة الإجمالية إذا تم إزالة الرمز من المفردات. ثم يقوم Unigram بإزالة p (مع p عادة ما تكون 10% أو 20%) في المائة من الرموز التي تكون زيادة الخسارة فيها هي الأدنى، *أي* تلك
|
||||
@ -188,7 +188,7 @@ $$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )
|
||||
|
||||
تحتوي جميع خوارزميات توكنز الموصوفة حتى الآن على نفس المشكلة: من المفترض أن النص المدخل يستخدم المسافات لفصل الكلمات. ومع ذلك، لا تستخدم جميع اللغات المسافات لفصل الكلمات. أحد الحلول الممكنة هو استخداممعالج مسبق للغة محدد، *مثال* [XLM](model_doc/xlm) يلذي يستخدم معالجات مسبقة محددة للصينية واليابانية والتايلاندية.
|
||||
لحل هذه المشكلة بشكل أعم، [SentencePiece: A simple and language independent subword tokenizer and
|
||||
detokenizer for Neural Text Processing (Kudo et al.، 2018)](https://huggingface.co/papers/1808.06226) يتعامل مع المدخلات
|
||||
detokenizer for Neural Text Processing (Kudo et al.، 2018)](https://arxiv.org/pdf/1808.06226.pdf) يتعامل مع المدخلات
|
||||
كتدفق بيانات خام، وبالتالي يشمل المسافة في مجموعة الأحرف التي سيتم استخدامها. ثم يستخدم خوارزمية BPE أو unigram
|
||||
لبناء المفردات المناسبة.
|
||||
|
||||
|
@ -306,48 +306,78 @@ pip install galore-torch
|
||||
ثم أضف ببساطة أحد `["galore_adamw"، "galore_adafactor"، "galore_adamw_8bit"]` في `optim` جنبًا إلى جنب مع `optim_target_modules`، والتي يمكن أن تكون قائمة من السلاسل أو التعبيرات النمطية regex أو المسار الكامل المطابق لأسماء الوحدات المستهدفة التي تريد تكييفها. فيما يلي مثال على النص البرمجي كامل(تأكد من `pip install trl datasets`):
|
||||
|
||||
```python
|
||||
import torch
|
||||
import datasets
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
import trl
|
||||
|
||||
from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb', split='train')
|
||||
args = SFTConfig(
|
||||
output_dir="./test-galore",
|
||||
|
||||
args = TrainingArguments(
|
||||
output_dir="./test-galore"،
|
||||
max_steps=100,
|
||||
optim="galore_adamw",
|
||||
optim_target_modules=[r".*.attn.*", r".*.mlp.*"],
|
||||
gradient_checkpointing=True,
|
||||
per_device_train_batch_size=2,
|
||||
optim="galore_adamw"،
|
||||
optim_target_modules=[r".*.attn.*"، r".*.mlp.*"]
|
||||
)
|
||||
trainer = SFTTrainer(
|
||||
model="google/gemma-2b",
|
||||
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
config = AutoConfig.from_pretrained(model_id)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_config(config).to(0)
|
||||
|
||||
trainer = trl.SFTTrainer(
|
||||
model=model,
|
||||
args=args,
|
||||
train_dataset=train_dataset,
|
||||
dataset_text_field='text',
|
||||
max_seq_length=512,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
```
|
||||
|
||||
لتمرير معامﻻت إضافية يدعمها GaLore، يجب عليك تمرير `optim_args` بشكل صحيح، على سبيل المثال:
|
||||
|
||||
```python
|
||||
import torch
|
||||
import datasets
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
import trl
|
||||
|
||||
from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb', split='train')
|
||||
args = SFTConfig(
|
||||
|
||||
args = TrainingArguments(
|
||||
output_dir="./test-galore",
|
||||
max_steps=100,
|
||||
per_device_train_batch_size=2,
|
||||
optim="galore_adamw",
|
||||
optim_target_modules=[r".*.attn.*", r".*.mlp.*"],
|
||||
optim_args="rank=64, update_proj_gap=100, scale=0.10",
|
||||
gradient_checkpointing=True,
|
||||
)
|
||||
trainer = SFTTrainer(
|
||||
model="google/gemma-2b",
|
||||
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
config = AutoConfig.from_pretrained(model_id)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_config(config).to(0)
|
||||
|
||||
trainer = trl.SFTTrainer(
|
||||
model=model,
|
||||
args=args,
|
||||
train_dataset=train_dataset,
|
||||
dataset_text_field='text',
|
||||
max_seq_length=512,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
```
|
||||
يمكنك قراءة المزيد حول الطريقة في [المستودع الأصلي](https://github.com/jiaweizzhao/GaLore) أو [الورقة البحثية](https://huggingface.co/papers/2403.03507).
|
||||
يمكنك قراءة المزيد حول الطريقة في [المستودع الأصلي](https://github.com/jiaweizzhao/GaLore) أو [الورقة البحثية](https://arxiv.org/abs/2403.03507).
|
||||
|
||||
حاليًا، يمكنك فقط تدريب الطبقات الخطية التي تعتبر طبقات GaLore وستستخدم التحلل ذو الرتبة المنخفضة للتدريب بينما سيتم تحسين الطبقات المتبقية بالطريقة التقليدية.
|
||||
|
||||
@ -356,22 +386,37 @@ trainer.train()
|
||||
يمكنك أيضًا إجراء تحسين طبقة تلو الأخرى عن طريق إضافة `layerwise` إلى اسم المُحسِّن كما هو موضح أدناه:
|
||||
|
||||
```python
|
||||
import torch
|
||||
import datasets
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
import trl
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb', split='train')
|
||||
args = SFTConfig(
|
||||
output_dir="./test-galore",
|
||||
max_steps=100,
|
||||
optim="galore_adamw_layerwise",
|
||||
optim_target_modules=[r".*.attn.*", r".*.mlp.*"],
|
||||
gradient_checkpointing=True,
|
||||
from transformers import TrainingArguments، AutoConfig، AutoTokenizer، AutoModelForCausalLM
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb'، split='train')
|
||||
|
||||
args = TrainingArguments(
|
||||
output_dir="./test-galore"،
|
||||
max_steps=100،
|
||||
per_device_train_batch_size=2،
|
||||
optim="galore_adamw_layerwise"،
|
||||
optim_target_modules=[r".*.attn.*"، r".*.mlp.*"]
|
||||
)
|
||||
trainer = SFTTrainer(
|
||||
model="google/gemma-2b",
|
||||
args=args,
|
||||
train_dataset=train_dataset,
|
||||
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
config = AutoConfig.from_pretrained(model_id)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_config(config).to(0)
|
||||
|
||||
trainer = trl.SFTTrainer(
|
||||
model=model،
|
||||
args=args،
|
||||
train_dataset=train_dataset،
|
||||
dataset_text_field='text'،
|
||||
max_seq_length=512،
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
```
|
||||
|
||||
@ -391,21 +436,39 @@ trainer.train()
|
||||
فيما يلي نص برمجي بسيط يوضح كيفية ضبط نموذج [google/gemma-2b](https://huggingface.co/google/gemma-2b) على مجموعة بيانات IMDB في الدقة الكاملة:
|
||||
|
||||
```python
|
||||
import torch
|
||||
import datasets
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from transformers import TrainingArguments، AutoTokenizer، AutoModelForCausalLM
|
||||
import trl
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb', split='train')
|
||||
args = SFTConfig(
|
||||
output_dir="./test-lomo",
|
||||
max_steps=100,
|
||||
optim="adalomo",
|
||||
gradient_checkpointing=True,
|
||||
train_dataset = datasets.load_dataset('imdb'، split='train')
|
||||
|
||||
args = TrainingArguments(
|
||||
output_dir="./test-lomo"،
|
||||
max_steps=100،
|
||||
per_device_train_batch_size=4،
|
||||
optim="adalomo"،
|
||||
gradient_checkpointing=True،
|
||||
logging_strategy="steps"،
|
||||
logging_steps=1،
|
||||
learning_rate=2e-6،
|
||||
save_strategy="no"،
|
||||
run_name="lomo-imdb"،
|
||||
)
|
||||
trainer = SFTTrainer(
|
||||
model="google/gemma-2b",
|
||||
args=args,
|
||||
train_dataset=train_dataset,
|
||||
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id، low_cpu_mem_usage=True).to(0)
|
||||
|
||||
trainer = trl.SFTTrainer(
|
||||
model=model،
|
||||
args=args،
|
||||
train_dataset=train_dataset،
|
||||
dataset_text_field='text'،
|
||||
max_seq_length=1024،
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
```
|
||||
|
||||
@ -440,7 +503,7 @@ args = TrainingArguments(
|
||||
# تحميل النموذج والمجزىء اللغوي
|
||||
model_id = "google/gemma-2b"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id).to(0)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
|
||||
|
||||
# تهيئة المدرب
|
||||
trainer = Trainer(
|
||||
@ -461,21 +524,39 @@ trainer.train()
|
||||
|
||||
فيما يلي نص برمجى بسيط لشرح كيفية ضبط [google/gemma-2b](https://huggingface.co/google/gemma-2b) بدقة على مجموعة بيانات IMDB بدقة كاملة:
|
||||
```python
|
||||
import torch
|
||||
import datasets
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
|
||||
import trl
|
||||
|
||||
train_dataset = datasets.load_dataset('imdb', split='train')
|
||||
args = SFTConfig(
|
||||
output_dir="./test-galore",
|
||||
max_steps=100,
|
||||
|
||||
args = TrainingArguments(
|
||||
output_dir="./test-schedulefree",
|
||||
max_steps=1000,
|
||||
per_device_train_batch_size=4,
|
||||
optim="schedule_free_adamw",
|
||||
gradient_checkpointing=True,
|
||||
logging_strategy="steps",
|
||||
logging_steps=1,
|
||||
learning_rate=2e-6,
|
||||
save_strategy="no",
|
||||
run_name="sfo-imdb",
|
||||
)
|
||||
trainer = SFTTrainer(
|
||||
model="google/gemma-2b",
|
||||
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
|
||||
|
||||
trainer = trl.SFTTrainer(
|
||||
model=model,
|
||||
args=args,
|
||||
train_dataset=train_dataset,
|
||||
dataset_text_field='text',
|
||||
max_seq_length=1024,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
```
|
||||
## تسريع ومدرب
|
||||
@ -593,7 +674,29 @@ use_cpu: false
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Tensor Parallelism with PyTorch 2">
|
||||
|
||||
```yml
|
||||
compute_environment: LOCAL_MACHINE
|
||||
tp_config:
|
||||
tp_size: 4
|
||||
distributed_type: TP
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: 'no'
|
||||
num_machines: 1
|
||||
num_processes: 4
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
يُعد أمر [`accelerate_launch`](https://huggingface.co/docs/accelerate/package_reference/cli#accelerate-launch) هو الطريقة المُوصى بها لتشغيل نص البرمجى للتدريب على نظام موزع باستخدام Accelerate و [`Trainer`] مع المعلمات المحددة في `config_file.yaml`. يتم حفظ هذا الملف في مجلد ذاكرة التخزين المؤقت لـ Accelerate ويتم تحميله تلقائيًا عند تشغيل `accelerate_launch`.
|
||||
|
||||
|
@ -23,6 +23,8 @@
|
||||
title: Laden und Trainieren von Adaptern mit 🤗 PEFT
|
||||
- local: model_sharing
|
||||
title: Ein Modell teilen
|
||||
- local: transformers_agents
|
||||
title: Agents
|
||||
- local: llm_tutorial
|
||||
title: Generation with LLMs
|
||||
title: Tutorials
|
||||
@ -37,4 +39,4 @@
|
||||
title: Testen
|
||||
- local: pr_checks
|
||||
title: Überprüfung einer Pull Request
|
||||
title: Contribute
|
||||
title: Contribute
|
@ -95,7 +95,7 @@ wie der Code geschrieben werden sollte :-)
|
||||
1. Der Vorwärtsdurchlauf Ihres Modells sollte vollständig in die Modellierungsdatei geschrieben werden und dabei völlig unabhängig von anderen
|
||||
Modellen in der Bibliothek. Wenn Sie einen Block aus einem anderen Modell wiederverwenden möchten, kopieren Sie den Code und fügen ihn mit einem
|
||||
`# Kopiert von` ein (siehe [hier](https://github.com/huggingface/transformers/blob/v4.17.0/src/transformers/models/roberta/modeling_roberta.py#L160)
|
||||
für ein gutes Beispiel und [hier](pr_checks#check-copies) für weitere Dokumentation zu Copied from).
|
||||
für ein gutes Beispiel und [hier](pr_checks#check-copies) für weitere Dokumentation zu Copied from).
|
||||
2. Der Code sollte vollständig verständlich sein, auch für einen Nicht-Muttersprachler. Das heißt, Sie sollten
|
||||
beschreibende Variablennamen wählen und Abkürzungen vermeiden. Ein Beispiel: `activation` ist `act` vorzuziehen.
|
||||
Von Variablennamen mit nur einem Buchstaben wird dringend abgeraten, es sei denn, es handelt sich um einen Index in einer for-Schleife.
|
||||
@ -402,7 +402,7 @@ Andernfalls beginnen wir mit der Erstellung eines neuen Modells. Wir empfehlen d
|
||||
ein bestehendes Modell:
|
||||
|
||||
```bash
|
||||
transformers add-new-model-like
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
Sie werden mit einem Fragebogen aufgefordert, die grundlegenden Informationen Ihres Modells einzugeben.
|
||||
|
@ -63,7 +63,7 @@ Wenn Sie sich vergewissert haben, dass der Fehler noch nicht gemeldet wurde, geb
|
||||
Um das Betriebssystem und die Softwareversionen automatisch auszugeben, führen Sie den folgenden Befehl aus:
|
||||
|
||||
```bash
|
||||
transformers env
|
||||
transformers-cli env
|
||||
```
|
||||
|
||||
Sie können denselben Befehl auch im Hauptverzeichnis des Repositorys ausführen:
|
||||
|
@ -55,148 +55,148 @@ Die Bibliothek enthält derzeit JAX-, PyTorch- und TensorFlow-Implementierungen,
|
||||
|
||||
<!--This list is updated automatically from the README with _make fix-copies_. Do not update manually! -->
|
||||
|
||||
1. **[ALBERT](model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://huggingface.co/papers/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
|
||||
1. **[ALIGN](model_doc/align)** (from Google Research) released with the paper [Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision](https://huggingface.co/papers/2102.05918) by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig.
|
||||
1. **[BART](model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://huggingface.co/papers/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer.
|
||||
1. **[BARThez](model_doc/barthez)** (from École polytechnique) released with the paper [BARThez: a Skilled Pretrained French Sequence-to-Sequence Model](https://huggingface.co/papers/2010.12321) by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis.
|
||||
1. **[BARTpho](model_doc/bartpho)** (from VinAI Research) released with the paper [BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese](https://huggingface.co/papers/2109.09701) by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
|
||||
1. **[BEiT](model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://huggingface.co/papers/2106.08254) by Hangbo Bao, Li Dong, Furu Wei.
|
||||
1. **[BERT](model_doc/bert)** (from Google) released with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://huggingface.co/papers/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova.
|
||||
1. **[BERT For Sequence Generation](model_doc/bert-generation)** (from Google) released with the paper [Leveraging Pre-trained Checkpoints for Sequence Generation Tasks](https://huggingface.co/papers/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
|
||||
1. **[ALBERT](model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
|
||||
1. **[ALIGN](model_doc/align)** (from Google Research) released with the paper [Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision](https://arxiv.org/abs/2102.05918) by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig.
|
||||
1. **[BART](model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://arxiv.org/abs/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer.
|
||||
1. **[BARThez](model_doc/barthez)** (from École polytechnique) released with the paper [BARThez: a Skilled Pretrained French Sequence-to-Sequence Model](https://arxiv.org/abs/2010.12321) by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis.
|
||||
1. **[BARTpho](model_doc/bartpho)** (from VinAI Research) released with the paper [BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese](https://arxiv.org/abs/2109.09701) by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
|
||||
1. **[BEiT](model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://arxiv.org/abs/2106.08254) by Hangbo Bao, Li Dong, Furu Wei.
|
||||
1. **[BERT](model_doc/bert)** (from Google) released with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova.
|
||||
1. **[BERT For Sequence Generation](model_doc/bert-generation)** (from Google) released with the paper [Leveraging Pre-trained Checkpoints for Sequence Generation Tasks](https://arxiv.org/abs/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
|
||||
1. **[BERTweet](model_doc/bertweet)** (from VinAI Research) released with the paper [BERTweet: A pre-trained language model for English Tweets](https://aclanthology.org/2020.emnlp-demos.2/) by Dat Quoc Nguyen, Thanh Vu and Anh Tuan Nguyen.
|
||||
1. **[BigBird-Pegasus](model_doc/bigbird_pegasus)** (from Google Research) released with the paper [Big Bird: Transformers for Longer Sequences](https://huggingface.co/papers/2007.14062) by Manzil Zaheer, Guru Guruganesh, Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, Amr Ahmed.
|
||||
1. **[BigBird-RoBERTa](model_doc/big_bird)** (from Google Research) released with the paper [Big Bird: Transformers for Longer Sequences](https://huggingface.co/papers/2007.14062) by Manzil Zaheer, Guru Guruganesh, Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, Amr Ahmed.
|
||||
1. **[Blenderbot](model_doc/blenderbot)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://huggingface.co/papers/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
|
||||
1. **[BlenderbotSmall](model_doc/blenderbot-small)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://huggingface.co/papers/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
|
||||
1. **[BigBird-Pegasus](model_doc/bigbird_pegasus)** (from Google Research) released with the paper [Big Bird: Transformers for Longer Sequences](https://arxiv.org/abs/2007.14062) by Manzil Zaheer, Guru Guruganesh, Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, Amr Ahmed.
|
||||
1. **[BigBird-RoBERTa](model_doc/big_bird)** (from Google Research) released with the paper [Big Bird: Transformers for Longer Sequences](https://arxiv.org/abs/2007.14062) by Manzil Zaheer, Guru Guruganesh, Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, Amr Ahmed.
|
||||
1. **[Blenderbot](model_doc/blenderbot)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://arxiv.org/abs/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
|
||||
1. **[BlenderbotSmall](model_doc/blenderbot-small)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://arxiv.org/abs/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
|
||||
1. **[BLOOM](model_doc/bloom)** (from BigScience workshop) released by the [BigScience Workshop](https://bigscience.huggingface.co/).
|
||||
1. **[BORT](model_doc/bort)** (from Alexa) released with the paper [Optimal Subarchitecture Extraction For BERT](https://huggingface.co/papers/2010.10499) by Adrian de Wynter and Daniel J. Perry.
|
||||
1. **[ByT5](model_doc/byt5)** (from Google Research) released with the paper [ByT5: Towards a token-free future with pre-trained byte-to-byte models](https://huggingface.co/papers/2105.13626) by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir Kale, Adam Roberts, Colin Raffel.
|
||||
1. **[CamemBERT](model_doc/camembert)** (from Inria/Facebook/Sorbonne) released with the paper [CamemBERT: a Tasty French Language Model](https://huggingface.co/papers/1911.03894) by Louis Martin*, Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah and Benoît Sagot.
|
||||
1. **[CANINE](model_doc/canine)** (from Google Research) released with the paper [CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language Representation](https://huggingface.co/papers/2103.06874) by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting.
|
||||
1. **[CLIP](model_doc/clip)** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision](https://huggingface.co/papers/2103.00020) by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
|
||||
1. **[CodeGen](model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://huggingface.co/papers/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[ConvBERT](model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://huggingface.co/papers/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://huggingface.co/papers/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
1. **[ConvNeXTV2](model_doc/convnextv2)** (from Facebook AI) released with the paper [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](https://huggingface.co/papers/2301.00808) by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
|
||||
1. **[CPM](model_doc/cpm)** (from Tsinghua University) released with the paper [CPM: A Large-scale Generative Chinese Pre-trained Language Model](https://huggingface.co/papers/2012.00413) by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin, Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen, Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
|
||||
1. **[CTRL](model_doc/ctrl)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://huggingface.co/papers/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
|
||||
1. **[CvT](model_doc/cvt)** (from Microsoft) released with the paper [CvT: Introducing Convolutions to Vision Transformers](https://huggingface.co/papers/2103.15808) by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan, Lei Zhang.
|
||||
1. **[Data2Vec](model_doc/data2vec)** (from Facebook) released with the paper [Data2Vec: A General Framework for Self-supervised Learning in Speech, Vision and Language](https://huggingface.co/papers/2202.03555) by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu, Michael Auli.
|
||||
1. **[DeBERTa](model_doc/deberta)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://huggingface.co/papers/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
|
||||
1. **[DeBERTa-v2](model_doc/deberta-v2)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://huggingface.co/papers/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
|
||||
1. **[Decision Transformer](model_doc/decision_transformer)** (from Berkeley/Facebook/Google) released with the paper [Decision Transformer: Reinforcement Learning via Sequence Modeling](https://huggingface.co/papers/2106.01345) by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
|
||||
1. **[DeiT](model_doc/deit)** (from Facebook) released with the paper [Training data-efficient image transformers & distillation through attention](https://huggingface.co/papers/2012.12877) by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Hervé Jégou.
|
||||
1. **[DETR](model_doc/detr)** (from Facebook) released with the paper [End-to-End Object Detection with Transformers](https://huggingface.co/papers/2005.12872) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, Sergey Zagoruyko.
|
||||
1. **[DialoGPT](model_doc/dialogpt)** (from Microsoft Research) released with the paper [DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation](https://huggingface.co/papers/1911.00536) by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan.
|
||||
1. **[DistilBERT](model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://huggingface.co/papers/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers-research-projects/tree/main/distillation) and a German version of DistilBERT.
|
||||
1. **[DiT](model_doc/dit)** (from Microsoft Research) released with the paper [DiT: Self-supervised Pre-training for Document Image Transformer](https://huggingface.co/papers/2203.02378) by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
|
||||
1. **[DPR](model_doc/dpr)** (from Facebook) released with the paper [Dense Passage Retrieval for Open-Domain Question Answering](https://huggingface.co/papers/2004.04906) by Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
|
||||
1. **[DPT](master/model_doc/dpt)** (from Intel Labs) released with the paper [Vision Transformers for Dense Prediction](https://huggingface.co/papers/2103.13413) by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
|
||||
1. **[EfficientNet](model_doc/efficientnet)** (from Google Research) released with the paper [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://huggingface.co/papers/1905.11946) by Mingxing Tan and Quoc V. Le.
|
||||
1. **[ELECTRA](model_doc/electra)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://huggingface.co/papers/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning.
|
||||
1. **[EncoderDecoder](model_doc/encoder-decoder)** (from Google Research) released with the paper [Leveraging Pre-trained Checkpoints for Sequence Generation Tasks](https://huggingface.co/papers/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
|
||||
1. **[FlauBERT](model_doc/flaubert)** (from CNRS) released with the paper [FlauBERT: Unsupervised Language Model Pre-training for French](https://huggingface.co/papers/1912.05372) by Hang Le, Loïc Vial, Jibril Frej, Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, Laurent Besacier, Didier Schwab.
|
||||
1. **[FLAVA](model_doc/flava)** (from Facebook AI) released with the paper [FLAVA: A Foundational Language And Vision Alignment Model](https://huggingface.co/papers/2112.04482) by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela.
|
||||
1. **[FNet](model_doc/fnet)** (from Google Research) released with the paper [FNet: Mixing Tokens with Fourier Transforms](https://huggingface.co/papers/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.
|
||||
1. **[Funnel Transformer](model_doc/funnel)** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing](https://huggingface.co/papers/2006.03236) by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
|
||||
1. **[GLPN](model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://huggingface.co/papers/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
|
||||
1. **[BORT](model_doc/bort)** (from Alexa) released with the paper [Optimal Subarchitecture Extraction For BERT](https://arxiv.org/abs/2010.10499) by Adrian de Wynter and Daniel J. Perry.
|
||||
1. **[ByT5](model_doc/byt5)** (from Google Research) released with the paper [ByT5: Towards a token-free future with pre-trained byte-to-byte models](https://arxiv.org/abs/2105.13626) by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir Kale, Adam Roberts, Colin Raffel.
|
||||
1. **[CamemBERT](model_doc/camembert)** (from Inria/Facebook/Sorbonne) released with the paper [CamemBERT: a Tasty French Language Model](https://arxiv.org/abs/1911.03894) by Louis Martin*, Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah and Benoît Sagot.
|
||||
1. **[CANINE](model_doc/canine)** (from Google Research) released with the paper [CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language Representation](https://arxiv.org/abs/2103.06874) by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting.
|
||||
1. **[CLIP](model_doc/clip)** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
|
||||
1. **[CodeGen](model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[ConvBERT](model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
1. **[ConvNeXTV2](model_doc/convnextv2)** (from Facebook AI) released with the paper [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](https://arxiv.org/abs/2301.00808) by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
|
||||
1. **[CPM](model_doc/cpm)** (from Tsinghua University) released with the paper [CPM: A Large-scale Generative Chinese Pre-trained Language Model](https://arxiv.org/abs/2012.00413) by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin, Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen, Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
|
||||
1. **[CTRL](model_doc/ctrl)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://arxiv.org/abs/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
|
||||
1. **[CvT](model_doc/cvt)** (from Microsoft) released with the paper [CvT: Introducing Convolutions to Vision Transformers](https://arxiv.org/abs/2103.15808) by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan, Lei Zhang.
|
||||
1. **[Data2Vec](model_doc/data2vec)** (from Facebook) released with the paper [Data2Vec: A General Framework for Self-supervised Learning in Speech, Vision and Language](https://arxiv.org/abs/2202.03555) by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu, Michael Auli.
|
||||
1. **[DeBERTa](model_doc/deberta)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
|
||||
1. **[DeBERTa-v2](model_doc/deberta-v2)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
|
||||
1. **[Decision Transformer](model_doc/decision_transformer)** (from Berkeley/Facebook/Google) released with the paper [Decision Transformer: Reinforcement Learning via Sequence Modeling](https://arxiv.org/abs/2106.01345) by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
|
||||
1. **[DeiT](model_doc/deit)** (from Facebook) released with the paper [Training data-efficient image transformers & distillation through attention](https://arxiv.org/abs/2012.12877) by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Hervé Jégou.
|
||||
1. **[DETR](model_doc/detr)** (from Facebook) released with the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, Sergey Zagoruyko.
|
||||
1. **[DialoGPT](model_doc/dialogpt)** (from Microsoft Research) released with the paper [DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation](https://arxiv.org/abs/1911.00536) by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan.
|
||||
1. **[DistilBERT](model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers-research-projects/tree/main/distillation) and a German version of DistilBERT.
|
||||
1. **[DiT](model_doc/dit)** (from Microsoft Research) released with the paper [DiT: Self-supervised Pre-training for Document Image Transformer](https://arxiv.org/abs/2203.02378) by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
|
||||
1. **[DPR](model_doc/dpr)** (from Facebook) released with the paper [Dense Passage Retrieval for Open-Domain Question Answering](https://arxiv.org/abs/2004.04906) by Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
|
||||
1. **[DPT](master/model_doc/dpt)** (from Intel Labs) released with the paper [Vision Transformers for Dense Prediction](https://arxiv.org/abs/2103.13413) by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
|
||||
1. **[EfficientNet](model_doc/efficientnet)** (from Google Research) released with the paper [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) by Mingxing Tan and Quoc V. Le.
|
||||
1. **[ELECTRA](model_doc/electra)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning.
|
||||
1. **[EncoderDecoder](model_doc/encoder-decoder)** (from Google Research) released with the paper [Leveraging Pre-trained Checkpoints for Sequence Generation Tasks](https://arxiv.org/abs/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
|
||||
1. **[FlauBERT](model_doc/flaubert)** (from CNRS) released with the paper [FlauBERT: Unsupervised Language Model Pre-training for French](https://arxiv.org/abs/1912.05372) by Hang Le, Loïc Vial, Jibril Frej, Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, Laurent Besacier, Didier Schwab.
|
||||
1. **[FLAVA](model_doc/flava)** (from Facebook AI) released with the paper [FLAVA: A Foundational Language And Vision Alignment Model](https://arxiv.org/abs/2112.04482) by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela.
|
||||
1. **[FNet](model_doc/fnet)** (from Google Research) released with the paper [FNet: Mixing Tokens with Fourier Transforms](https://arxiv.org/abs/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.
|
||||
1. **[Funnel Transformer](model_doc/funnel)** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing](https://arxiv.org/abs/2006.03236) by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
|
||||
1. **[GLPN](model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
|
||||
1. **[GPT](model_doc/openai-gpt)** (from OpenAI) released with the paper [Improving Language Understanding by Generative Pre-Training](https://openai.com/research/language-unsupervised/) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever.
|
||||
1. **[GPT Neo](model_doc/gpt_neo)** (from EleutherAI) released in the repository [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy.
|
||||
1. **[GPT NeoX](model_doc/gpt_neox)** (from EleutherAI) released with the paper [GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://huggingface.co/papers/2204.06745) by Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, USVSN Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, Samuel Weinbach
|
||||
1. **[GPT NeoX](model_doc/gpt_neox)** (from EleutherAI) released with the paper [GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://arxiv.org/abs/2204.06745) by Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, USVSN Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, Samuel Weinbach
|
||||
1. **[GPT-2](model_doc/gpt2)** (from OpenAI) released with the paper [Language Models are Unsupervised Multitask Learners](https://openai.com/research/better-language-models/) by Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever.
|
||||
1. **[GPT-J](model_doc/gptj)** (from EleutherAI) released in the repository [kingoflolz/mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax/) by Ben Wang and Aran Komatsuzaki.
|
||||
1. **[GPTSAN-japanese](model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[GroupViT](model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://huggingface.co/papers/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[Hubert](model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://huggingface.co/papers/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://huggingface.co/papers/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[GroupViT](model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[Hubert](model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[ImageGPT](model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[LayoutLM](model_doc/layoutlm)** (from Microsoft Research Asia) released with the paper [LayoutLM: Pre-training of Text and Layout for Document Image Understanding](https://huggingface.co/papers/1912.13318) by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou.
|
||||
1. **[LayoutLMv2](model_doc/layoutlmv2)** (from Microsoft Research Asia) released with the paper [LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding](https://huggingface.co/papers/2012.14740) by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou.
|
||||
1. **[LayoutLMv3](model_doc/layoutlmv3)** (from Microsoft Research Asia) released with the paper [LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking](https://huggingface.co/papers/2204.08387) by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
|
||||
1. **[LayoutXLM](model_doc/layoutxlm)** (from Microsoft Research Asia) released with the paper [LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding](https://huggingface.co/papers/2104.08836) by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Furu Wei.
|
||||
1. **[LED](model_doc/led)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://huggingface.co/papers/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
|
||||
1. **[LeViT](model_doc/levit)** (from Meta AI) released with the paper [LeViT: A Vision Transformer in ConvNet's Clothing for Faster Inference](https://huggingface.co/papers/2104.01136) by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze.
|
||||
1. **[Longformer](model_doc/longformer)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://huggingface.co/papers/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
|
||||
1. **[LongT5](model_doc/longt5)** (from Google AI) released with the paper [LongT5: Efficient Text-To-Text Transformer for Long Sequences](https://huggingface.co/papers/2112.07916) by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung, Yinfei Yang.
|
||||
1. **[LUKE](model_doc/luke)** (from Studio Ousia) released with the paper [LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention](https://huggingface.co/papers/2010.01057) by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda, Yuji Matsumoto.
|
||||
1. **[LXMERT](model_doc/lxmert)** (from UNC Chapel Hill) released with the paper [LXMERT: Learning Cross-Modality Encoder Representations from Transformers for Open-Domain Question Answering](https://huggingface.co/papers/1908.07490) by Hao Tan and Mohit Bansal.
|
||||
1. **[M-CTC-T](model_doc/mctct)** (from Facebook) released with the paper [Pseudo-Labeling For Massively Multilingual Speech Recognition](https://huggingface.co/papers/2111.00161) by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert.
|
||||
1. **[M2M100](model_doc/m2m_100)** (from Facebook) released with the paper [Beyond English-Centric Multilingual Machine Translation](https://huggingface.co/papers/2010.11125) by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
|
||||
1. **[LayoutLM](model_doc/layoutlm)** (from Microsoft Research Asia) released with the paper [LayoutLM: Pre-training of Text and Layout for Document Image Understanding](https://arxiv.org/abs/1912.13318) by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou.
|
||||
1. **[LayoutLMv2](model_doc/layoutlmv2)** (from Microsoft Research Asia) released with the paper [LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding](https://arxiv.org/abs/2012.14740) by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou.
|
||||
1. **[LayoutLMv3](model_doc/layoutlmv3)** (from Microsoft Research Asia) released with the paper [LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking](https://arxiv.org/abs/2204.08387) by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
|
||||
1. **[LayoutXLM](model_doc/layoutxlm)** (from Microsoft Research Asia) released with the paper [LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding](https://arxiv.org/abs/2104.08836) by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Furu Wei.
|
||||
1. **[LED](model_doc/led)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
|
||||
1. **[LeViT](model_doc/levit)** (from Meta AI) released with the paper [LeViT: A Vision Transformer in ConvNet's Clothing for Faster Inference](https://arxiv.org/abs/2104.01136) by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze.
|
||||
1. **[Longformer](model_doc/longformer)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
|
||||
1. **[LongT5](model_doc/longt5)** (from Google AI) released with the paper [LongT5: Efficient Text-To-Text Transformer for Long Sequences](https://arxiv.org/abs/2112.07916) by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung, Yinfei Yang.
|
||||
1. **[LUKE](model_doc/luke)** (from Studio Ousia) released with the paper [LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention](https://arxiv.org/abs/2010.01057) by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda, Yuji Matsumoto.
|
||||
1. **[LXMERT](model_doc/lxmert)** (from UNC Chapel Hill) released with the paper [LXMERT: Learning Cross-Modality Encoder Representations from Transformers for Open-Domain Question Answering](https://arxiv.org/abs/1908.07490) by Hao Tan and Mohit Bansal.
|
||||
1. **[M-CTC-T](model_doc/mctct)** (from Facebook) released with the paper [Pseudo-Labeling For Massively Multilingual Speech Recognition](https://arxiv.org/abs/2111.00161) by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert.
|
||||
1. **[M2M100](model_doc/m2m_100)** (from Facebook) released with the paper [Beyond English-Centric Multilingual Machine Translation](https://arxiv.org/abs/2010.11125) by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
|
||||
1. **[MarianMT](model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
|
||||
1. **[Mask2Former](model_doc/mask2former)** (from FAIR and UIUC) released with the paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://huggingface.co/papers/2112.01527) by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar.
|
||||
1. **[MaskFormer](model_doc/maskformer)** (from Meta and UIUC) released with the paper [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://huggingface.co/papers/2107.06278) by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov.
|
||||
1. **[mBART](model_doc/mbart)** (from Facebook) released with the paper [Multilingual Denoising Pre-training for Neural Machine Translation](https://huggingface.co/papers/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
|
||||
1. **[mBART-50](model_doc/mbart)** (from Facebook) released with the paper [Multilingual Translation with Extensible Multilingual Pretraining and Finetuning](https://huggingface.co/papers/2008.00401) by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav Chaudhary, Jiatao Gu, Angela Fan.
|
||||
1. **[Megatron-BERT](model_doc/megatron-bert)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://huggingface.co/papers/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[Megatron-GPT2](model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://huggingface.co/papers/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[mLUKE](model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://huggingface.co/papers/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
|
||||
1. **[MobileBERT](model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://huggingface.co/papers/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
1. **[MobileViT](model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://huggingface.co/papers/2110.02178) by Sachin Mehta and Mohammad Rastegari.
|
||||
1. **[MPNet](model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://huggingface.co/papers/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
|
||||
1. **[MT5](model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://huggingface.co/papers/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
|
||||
1. **[MVP](model_doc/mvp)** (from RUC AI Box) released with the paper [MVP: Multi-task Supervised Pre-training for Natural Language Generation](https://huggingface.co/papers/2206.12131) by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
|
||||
1. **[Nezha](model_doc/nezha)** (from Huawei Noah’s Ark Lab) released with the paper [NEZHA: Neural Contextualized Representation for Chinese Language Understanding](https://huggingface.co/papers/1909.00204) by Junqiu Wei, Xiaozhe Ren, Xiaoguang Li, Wenyong Huang, Yi Liao, Yasheng Wang, Jiashu Lin, Xin Jiang, Xiao Chen and Qun Liu.
|
||||
1. **[NLLB](model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://huggingface.co/papers/2207.04672) by the NLLB team.
|
||||
1. **[Nyströmformer](model_doc/nystromformer)** (from the University of Wisconsin - Madison) released with the paper [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://huggingface.co/papers/2102.03902) by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh.
|
||||
1. **[OneFormer](model_doc/oneformer)** (from SHI Labs) released with the paper [OneFormer: One Transformer to Rule Universal Image Segmentation](https://huggingface.co/papers/2211.06220) by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi.
|
||||
1. **[OPT](master/model_doc/opt)** (from Meta AI) released with the paper [OPT: Open Pre-trained Transformer Language Models](https://huggingface.co/papers/2205.01068) by Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen et al.
|
||||
1. **[OWL-ViT](model_doc/owlvit)** (from Google AI) released with the paper [Simple Open-Vocabulary Object Detection with Vision Transformers](https://huggingface.co/papers/2205.06230) by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby.
|
||||
1. **[Pegasus](model_doc/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://huggingface.co/papers/1912.08777) by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
|
||||
1. **[Perceiver IO](model_doc/perceiver)** (from Deepmind) released with the paper [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://huggingface.co/papers/2107.14795) by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
|
||||
1. **[Mask2Former](model_doc/mask2former)** (from FAIR and UIUC) released with the paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar.
|
||||
1. **[MaskFormer](model_doc/maskformer)** (from Meta and UIUC) released with the paper [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov.
|
||||
1. **[mBART](model_doc/mbart)** (from Facebook) released with the paper [Multilingual Denoising Pre-training for Neural Machine Translation](https://arxiv.org/abs/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
|
||||
1. **[mBART-50](model_doc/mbart)** (from Facebook) released with the paper [Multilingual Translation with Extensible Multilingual Pretraining and Finetuning](https://arxiv.org/abs/2008.00401) by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav Chaudhary, Jiatao Gu, Angela Fan.
|
||||
1. **[Megatron-BERT](model_doc/megatron-bert)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[Megatron-GPT2](model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[mLUKE](model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
|
||||
1. **[MobileBERT](model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
1. **[MobileViT](model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
|
||||
1. **[MPNet](model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
|
||||
1. **[MT5](model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
|
||||
1. **[MVP](model_doc/mvp)** (from RUC AI Box) released with the paper [MVP: Multi-task Supervised Pre-training for Natural Language Generation](https://arxiv.org/abs/2206.12131) by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
|
||||
1. **[Nezha](model_doc/nezha)** (from Huawei Noah’s Ark Lab) released with the paper [NEZHA: Neural Contextualized Representation for Chinese Language Understanding](https://arxiv.org/abs/1909.00204) by Junqiu Wei, Xiaozhe Ren, Xiaoguang Li, Wenyong Huang, Yi Liao, Yasheng Wang, Jiashu Lin, Xin Jiang, Xiao Chen and Qun Liu.
|
||||
1. **[NLLB](model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
|
||||
1. **[Nyströmformer](model_doc/nystromformer)** (from the University of Wisconsin - Madison) released with the paper [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://arxiv.org/abs/2102.03902) by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh.
|
||||
1. **[OneFormer](model_doc/oneformer)** (from SHI Labs) released with the paper [OneFormer: One Transformer to Rule Universal Image Segmentation](https://arxiv.org/abs/2211.06220) by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi.
|
||||
1. **[OPT](master/model_doc/opt)** (from Meta AI) released with the paper [OPT: Open Pre-trained Transformer Language Models](https://arxiv.org/abs/2205.01068) by Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen et al.
|
||||
1. **[OWL-ViT](model_doc/owlvit)** (from Google AI) released with the paper [Simple Open-Vocabulary Object Detection with Vision Transformers](https://arxiv.org/abs/2205.06230) by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby.
|
||||
1. **[Pegasus](model_doc/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777) by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
|
||||
1. **[Perceiver IO](model_doc/perceiver)** (from Deepmind) released with the paper [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://arxiv.org/abs/2107.14795) by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
|
||||
1. **[PhoBERT](model_doc/phobert)** (from VinAI Research) released with the paper [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92/) by Dat Quoc Nguyen and Anh Tuan Nguyen.
|
||||
1. **[PLBart](model_doc/plbart)** (from UCLA NLP) released with the paper [Unified Pre-training for Program Understanding and Generation](https://huggingface.co/papers/2103.06333) by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
|
||||
1. **[PoolFormer](model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://huggingface.co/papers/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng.
|
||||
1. **[ProphetNet](model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://huggingface.co/papers/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[QDQBert](model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://huggingface.co/papers/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
|
||||
1. **[RAG](model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://huggingface.co/papers/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://huggingface.co/papers/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[Reformer](model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://huggingface.co/papers/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://huggingface.co/papers/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://huggingface.co/papers/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
1. **[ResNet](model_doc/resnet)** (from Microsoft Research) released with the paper [Deep Residual Learning for Image Recognition](https://huggingface.co/papers/1512.03385) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
|
||||
1. **[RoBERTa](model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://huggingface.co/papers/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
|
||||
1. **[RoFormer](model_doc/roformer)** (from ZhuiyiTechnology), released together with the paper [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://huggingface.co/papers/2104.09864) by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
|
||||
1. **[SegFormer](model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo.
|
||||
1. **[SEW](model_doc/sew)** (from ASAPP) released with the paper [Performance-Efficiency Trade-offs in Unsupervised Pre-training for Speech Recognition](https://huggingface.co/papers/2109.06870) by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
|
||||
1. **[SEW-D](model_doc/sew_d)** (from ASAPP) released with the paper [Performance-Efficiency Trade-offs in Unsupervised Pre-training for Speech Recognition](https://huggingface.co/papers/2109.06870) by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
|
||||
1. **[SpeechToTextTransformer](model_doc/speech_to_text)** (from Facebook), released together with the paper [fairseq S2T: Fast Speech-to-Text Modeling with fairseq](https://huggingface.co/papers/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino.
|
||||
1. **[SpeechToTextTransformer2](model_doc/speech_to_text_2)** (from Facebook), released together with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation](https://huggingface.co/papers/2104.06678) by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
|
||||
1. **[Splinter](model_doc/splinter)** (from Tel Aviv University), released together with the paper [Few-Shot Question Answering by Pretraining Span Selection](https://huggingface.co/papers/2101.00438) by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
|
||||
1. **[SqueezeBERT](model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://huggingface.co/papers/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
|
||||
1. **[Swin Transformer](model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://huggingface.co/papers/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
|
||||
1. **[Swin Transformer V2](model_doc/swinv2)** (from Microsoft) released with the paper [Swin Transformer V2: Scaling Up Capacity and Resolution](https://huggingface.co/papers/2111.09883) by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
|
||||
1. **[T5](model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://huggingface.co/papers/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
|
||||
1. **[PLBart](model_doc/plbart)** (from UCLA NLP) released with the paper [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
|
||||
1. **[PoolFormer](model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng.
|
||||
1. **[ProphetNet](model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[QDQBert](model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
|
||||
1. **[RAG](model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[Reformer](model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
1. **[ResNet](model_doc/resnet)** (from Microsoft Research) released with the paper [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
|
||||
1. **[RoBERTa](model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
|
||||
1. **[RoFormer](model_doc/roformer)** (from ZhuiyiTechnology), released together with the paper [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864) by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
|
||||
1. **[SegFormer](model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo.
|
||||
1. **[SEW](model_doc/sew)** (from ASAPP) released with the paper [Performance-Efficiency Trade-offs in Unsupervised Pre-training for Speech Recognition](https://arxiv.org/abs/2109.06870) by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
|
||||
1. **[SEW-D](model_doc/sew_d)** (from ASAPP) released with the paper [Performance-Efficiency Trade-offs in Unsupervised Pre-training for Speech Recognition](https://arxiv.org/abs/2109.06870) by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
|
||||
1. **[SpeechToTextTransformer](model_doc/speech_to_text)** (from Facebook), released together with the paper [fairseq S2T: Fast Speech-to-Text Modeling with fairseq](https://arxiv.org/abs/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino.
|
||||
1. **[SpeechToTextTransformer2](model_doc/speech_to_text_2)** (from Facebook), released together with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation](https://arxiv.org/abs/2104.06678) by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
|
||||
1. **[Splinter](model_doc/splinter)** (from Tel Aviv University), released together with the paper [Few-Shot Question Answering by Pretraining Span Selection](https://arxiv.org/abs/2101.00438) by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
|
||||
1. **[SqueezeBERT](model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
|
||||
1. **[Swin Transformer](model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
|
||||
1. **[Swin Transformer V2](model_doc/swinv2)** (from Microsoft) released with the paper [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883) by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
|
||||
1. **[T5](model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
|
||||
1. **[T5v1.1](model_doc/t5v1.1)** (from Google AI) released in the repository [google-research/text-to-text-transfer-transformer](https://github.com/google-research/text-to-text-transfer-transformer/blob/main/released_checkpoints.md#t511) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
|
||||
1. **[TAPAS](model_doc/tapas)** (from Google AI) released with the paper [TAPAS: Weakly Supervised Table Parsing via Pre-training](https://huggingface.co/papers/2004.02349) by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos.
|
||||
1. **[TAPEX](model_doc/tapex)** (from Microsoft Research) released with the paper [TAPEX: Table Pre-training via Learning a Neural SQL Executor](https://huggingface.co/papers/2107.07653) by Qian Liu, Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou.
|
||||
1. **[Trajectory Transformer](model_doc/trajectory_transformers)** (from the University of California at Berkeley) released with the paper [Offline Reinforcement Learning as One Big Sequence Modeling Problem](https://huggingface.co/papers/2106.02039) by Michael Janner, Qiyang Li, Sergey Levine
|
||||
1. **[Transformer-XL](model_doc/transfo-xl)** (from Google/CMU) released with the paper [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](https://huggingface.co/papers/1901.02860) by Zihang Dai*, Zhilin Yang*, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov.
|
||||
1. **[TrOCR](model_doc/trocr)** (from Microsoft), released together with the paper [TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models](https://huggingface.co/papers/2109.10282) by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang, Zhoujun Li, Furu Wei.
|
||||
1. **[UL2](model_doc/ul2)** (from Google Research) released with the paper [Unifying Language Learning Paradigms](https://huggingface.co/papers/2205.05131v1) by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler
|
||||
1. **[TAPAS](model_doc/tapas)** (from Google AI) released with the paper [TAPAS: Weakly Supervised Table Parsing via Pre-training](https://arxiv.org/abs/2004.02349) by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos.
|
||||
1. **[TAPEX](model_doc/tapex)** (from Microsoft Research) released with the paper [TAPEX: Table Pre-training via Learning a Neural SQL Executor](https://arxiv.org/abs/2107.07653) by Qian Liu, Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou.
|
||||
1. **[Trajectory Transformer](model_doc/trajectory_transformers)** (from the University of California at Berkeley) released with the paper [Offline Reinforcement Learning as One Big Sequence Modeling Problem](https://arxiv.org/abs/2106.02039) by Michael Janner, Qiyang Li, Sergey Levine
|
||||
1. **[Transformer-XL](model_doc/transfo-xl)** (from Google/CMU) released with the paper [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](https://arxiv.org/abs/1901.02860) by Zihang Dai*, Zhilin Yang*, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov.
|
||||
1. **[TrOCR](model_doc/trocr)** (from Microsoft), released together with the paper [TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models](https://arxiv.org/abs/2109.10282) by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang, Zhoujun Li, Furu Wei.
|
||||
1. **[UL2](model_doc/ul2)** (from Google Research) released with the paper [Unifying Language Learning Paradigms](https://arxiv.org/abs/2205.05131v1) by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler
|
||||
1. **[UMT5](model_doc/umt5)** (from Google Research) released with the paper [UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining](https://openreview.net/forum?id=kXwdL1cWOAi) by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
|
||||
1. **[UniSpeech](model_doc/unispeech)** (from Microsoft Research) released with the paper [UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data](https://huggingface.co/papers/2101.07597) by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael Zeng, Xuedong Huang.
|
||||
1. **[UniSpeechSat](model_doc/unispeech-sat)** (from Microsoft Research) released with the paper [UNISPEECH-SAT: UNIVERSAL SPEECH REPRESENTATION LEARNING WITH SPEAKER AWARE PRE-TRAINING](https://huggingface.co/papers/2110.05752) by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen, Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu.
|
||||
1. **[VAN](model_doc/van)** (from Tsinghua University and Nankai University) released with the paper [Visual Attention Network](https://huggingface.co/papers/2202.09741) by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
|
||||
1. **[VideoMAE](model_doc/videomae)** (from Multimedia Computing Group, Nanjing University) released with the paper [VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training](https://huggingface.co/papers/2203.12602) by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
|
||||
1. **[ViLT](model_doc/vilt)** (from NAVER AI Lab/Kakao Enterprise/Kakao Brain) released with the paper [ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision](https://huggingface.co/papers/2102.03334) by Wonjae Kim, Bokyung Son, Ildoo Kim.
|
||||
1. **[Vision Transformer (ViT)](model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://huggingface.co/papers/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby.
|
||||
1. **[VisualBERT](model_doc/visual_bert)** (from UCLA NLP) released with the paper [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://huggingface.co/papers/1908.03557) by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
|
||||
1. **[ViTMAE](model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://huggingface.co/papers/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick.
|
||||
1. **[Wav2Vec2](model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://huggingface.co/papers/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
|
||||
1. **[Wav2Vec2-Conformer](model_doc/wav2vec2-conformer)** (from Facebook AI) released with the paper [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://huggingface.co/papers/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
|
||||
1. **[Wav2Vec2Phoneme](model_doc/wav2vec2_phoneme)** (from Facebook AI) released with the paper [Simple and Effective Zero-shot Cross-lingual Phoneme Recognition](https://huggingface.co/papers/2109.11680) by Qiantong Xu, Alexei Baevski, Michael Auli.
|
||||
1. **[WavLM](model_doc/wavlm)** (from Microsoft Research) released with the paper [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://huggingface.co/papers/2110.13900) by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen, Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu, Michael Zeng, Furu Wei.
|
||||
1. **[XGLM](model_doc/xglm)** (From Facebook AI) released with the paper [Few-shot Learning with Multilingual Language Models](https://huggingface.co/papers/2112.10668) by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
|
||||
1. **[XLM](model_doc/xlm)** (from Facebook) released together with the paper [Cross-lingual Language Model Pretraining](https://huggingface.co/papers/1901.07291) by Guillaume Lample and Alexis Conneau.
|
||||
1. **[XLM-ProphetNet](model_doc/xlm-prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://huggingface.co/papers/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[XLM-RoBERTa](model_doc/xlm-roberta)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://huggingface.co/papers/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov.
|
||||
1. **[XLM-RoBERTa-XL](model_doc/xlm-roberta-xl)** (from Facebook AI), released together with the paper [Larger-Scale Transformers for Multilingual Masked Language Modeling](https://huggingface.co/papers/2105.00572) by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau.
|
||||
1. **[XLM-V](model_doc/xlm-v)** (from Meta AI) released with the paper [XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models](https://huggingface.co/papers/2301.10472) by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer, Madian Khabsa.
|
||||
1. **[XLNet](model_doc/xlnet)** (from Google/CMU) released with the paper [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://huggingface.co/papers/1906.08237) by Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le.
|
||||
1. **[XLS-R](model_doc/xls_r)** (from Facebook AI) released with the paper [XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale](https://huggingface.co/papers/2111.09296) by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
|
||||
1. **[XLSR-Wav2Vec2](model_doc/xlsr_wav2vec2)** (from Facebook AI) released with the paper [Unsupervised Cross-Lingual Representation Learning For Speech Recognition](https://huggingface.co/papers/2006.13979) by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael Auli.
|
||||
1. **[YOLOS](model_doc/yolos)** (from Huazhong University of Science & Technology) released with the paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://huggingface.co/papers/2106.00666) by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
|
||||
1. **[YOSO](model_doc/yoso)** (from the University of Wisconsin - Madison) released with the paper [You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling](https://huggingface.co/papers/2111.09714) by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh.
|
||||
1. **[UniSpeech](model_doc/unispeech)** (from Microsoft Research) released with the paper [UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data](https://arxiv.org/abs/2101.07597) by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael Zeng, Xuedong Huang.
|
||||
1. **[UniSpeechSat](model_doc/unispeech-sat)** (from Microsoft Research) released with the paper [UNISPEECH-SAT: UNIVERSAL SPEECH REPRESENTATION LEARNING WITH SPEAKER AWARE PRE-TRAINING](https://arxiv.org/abs/2110.05752) by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen, Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu.
|
||||
1. **[VAN](model_doc/van)** (from Tsinghua University and Nankai University) released with the paper [Visual Attention Network](https://arxiv.org/abs/2202.09741) by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
|
||||
1. **[VideoMAE](model_doc/videomae)** (from Multimedia Computing Group, Nanjing University) released with the paper [VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training](https://arxiv.org/abs/2203.12602) by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
|
||||
1. **[ViLT](model_doc/vilt)** (from NAVER AI Lab/Kakao Enterprise/Kakao Brain) released with the paper [ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision](https://arxiv.org/abs/2102.03334) by Wonjae Kim, Bokyung Son, Ildoo Kim.
|
||||
1. **[Vision Transformer (ViT)](model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby.
|
||||
1. **[VisualBERT](model_doc/visual_bert)** (from UCLA NLP) released with the paper [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
|
||||
1. **[ViTMAE](model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick.
|
||||
1. **[Wav2Vec2](model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
|
||||
1. **[Wav2Vec2-Conformer](model_doc/wav2vec2-conformer)** (from Facebook AI) released with the paper [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
|
||||
1. **[Wav2Vec2Phoneme](model_doc/wav2vec2_phoneme)** (from Facebook AI) released with the paper [Simple and Effective Zero-shot Cross-lingual Phoneme Recognition](https://arxiv.org/abs/2109.11680) by Qiantong Xu, Alexei Baevski, Michael Auli.
|
||||
1. **[WavLM](model_doc/wavlm)** (from Microsoft Research) released with the paper [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900) by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen, Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu, Michael Zeng, Furu Wei.
|
||||
1. **[XGLM](model_doc/xglm)** (From Facebook AI) released with the paper [Few-shot Learning with Multilingual Language Models](https://arxiv.org/abs/2112.10668) by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
|
||||
1. **[XLM](model_doc/xlm)** (from Facebook) released together with the paper [Cross-lingual Language Model Pretraining](https://arxiv.org/abs/1901.07291) by Guillaume Lample and Alexis Conneau.
|
||||
1. **[XLM-ProphetNet](model_doc/xlm-prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[XLM-RoBERTa](model_doc/xlm-roberta)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://arxiv.org/abs/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov.
|
||||
1. **[XLM-RoBERTa-XL](model_doc/xlm-roberta-xl)** (from Facebook AI), released together with the paper [Larger-Scale Transformers for Multilingual Masked Language Modeling](https://arxiv.org/abs/2105.00572) by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau.
|
||||
1. **[XLM-V](model_doc/xlm-v)** (from Meta AI) released with the paper [XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models](https://arxiv.org/abs/2301.10472) by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer, Madian Khabsa.
|
||||
1. **[XLNet](model_doc/xlnet)** (from Google/CMU) released with the paper [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) by Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le.
|
||||
1. **[XLS-R](model_doc/xls_r)** (from Facebook AI) released with the paper [XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale](https://arxiv.org/abs/2111.09296) by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
|
||||
1. **[XLSR-Wav2Vec2](model_doc/xlsr_wav2vec2)** (from Facebook AI) released with the paper [Unsupervised Cross-Lingual Representation Learning For Speech Recognition](https://arxiv.org/abs/2006.13979) by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael Auli.
|
||||
1. **[YOLOS](model_doc/yolos)** (from Huazhong University of Science & Technology) released with the paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://arxiv.org/abs/2106.00666) by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
|
||||
1. **[YOSO](model_doc/yoso)** (from the University of Wisconsin - Madison) released with the paper [You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling](https://arxiv.org/abs/2111.09714) by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh.
|
||||
|
||||
|
||||
### Unterstützte Frameworks
|
||||
|
@ -56,7 +56,7 @@ Dateien lassen sich auch in einem Repository leicht bearbeiten, und Sie können
|
||||
Bevor Sie ein Modell für den Hub freigeben, benötigen Sie Ihre Hugging Face-Anmeldedaten. Wenn Sie Zugang zu einem Terminal haben, führen Sie den folgenden Befehl in der virtuellen Umgebung aus, in der 🤗 Transformers installiert ist. Dadurch werden Ihre Zugangsdaten in Ihrem Hugging Face-Cache-Ordner (standardmäßig `~/.cache/`) gespeichert:
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
Wenn Sie ein Notebook wie Jupyter oder Colaboratory verwenden, stellen Sie sicher, dass Sie die [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library) Bibliothek installiert haben. Diese Bibliothek ermöglicht Ihnen die programmatische Interaktion mit dem Hub.
|
||||
|
@ -44,7 +44,7 @@ Transformers unterstützt nativ einige PEFT-Methoden, d.h. Sie können lokal ode
|
||||
|
||||
- [Low Rank Adapters](https://huggingface.co/docs/peft/conceptual_guides/lora)
|
||||
- [IA3](https://huggingface.co/docs/peft/conceptual_guides/ia3)
|
||||
- [AdaLoRA](https://huggingface.co/papers/2303.10512)
|
||||
- [AdaLoRA](https://arxiv.org/abs/2303.10512)
|
||||
|
||||
Wenn Sie andere PEFT-Methoden, wie z.B. Prompt Learning oder Prompt Tuning, verwenden möchten, oder über die 🤗 PEFT-Bibliothek im Allgemeinen, lesen Sie bitte die [Dokumentation](https://huggingface.co/docs/peft/index).
|
||||
|
||||
|
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
|
||||
Alle Skripte können Ihr endgültiges Modell in den [Model Hub](https://huggingface.co/models) hochladen. Stellen Sie sicher, dass Sie bei Hugging Face angemeldet sind, bevor Sie beginnen:
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
Dann fügen Sie dem Skript das Argument `push_to_hub` hinzu. Mit diesem Argument wird ein Repository mit Ihrem Hugging Face-Benutzernamen und dem in `output_dir` angegebenen Ordnernamen erstellt.
|
||||
|
@ -473,6 +473,13 @@ Hier ist zum Beispiel ein Test, der nur ausgeführt werden muss, wenn 2 oder meh
|
||||
def test_example_with_multi_gpu():
|
||||
```
|
||||
|
||||
Wenn ein Test `tensorflow` benötigt, verwenden Sie den Dekorator `require_tf`. Zum Beispiel:
|
||||
|
||||
```python no-style
|
||||
@require_tf
|
||||
def test_tf_thing_with_tensorflow():
|
||||
```
|
||||
|
||||
Diese Dekors können gestapelt werden. Wenn zum Beispiel ein Test langsam ist und mindestens eine GPU unter pytorch benötigt, können Sie
|
||||
wie Sie ihn einrichten können:
|
||||
|
||||
@ -1197,6 +1204,9 @@ if torch.cuda.is_available():
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(seed)
|
||||
|
||||
# tf RNG
|
||||
tf.random.set_seed(seed)
|
||||
```
|
||||
|
||||
### Tests debuggen
|
||||
|
323
docs/source/de/transformers_agents.md
Normal file
323
docs/source/de/transformers_agents.md
Normal file
@ -0,0 +1,323 @@
|
||||
<!--Copyright 2023 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Transformers Agents
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
Transformers Agents ist eine experimentelle API, die jederzeit geändert werden kann. Die von den Agenten zurückgegebenen Ergebnisse
|
||||
zurückgegeben werden, können variieren, da sich die APIs oder die zugrunde liegenden Modelle ändern können.
|
||||
|
||||
</Tip>
|
||||
|
||||
Transformers Version v4.29.0, die auf dem Konzept von *Tools* und *Agenten* aufbaut. Sie können damit spielen in
|
||||
[dieses Colab](https://colab.research.google.com/drive/1c7MHD-T1forUPGcC_jlwsIptOzpG3hSj).
|
||||
|
||||
Kurz gesagt, es bietet eine API für natürliche Sprache auf der Grundlage von Transformers: Wir definieren eine Reihe von kuratierten Tools und entwerfen einen
|
||||
Agenten, um natürliche Sprache zu interpretieren und diese Werkzeuge zu verwenden. Es ist von vornherein erweiterbar; wir haben einige relevante Tools kuratiert,
|
||||
aber wir werden Ihnen zeigen, wie das System einfach erweitert werden kann, um jedes von der Community entwickelte Tool zu verwenden.
|
||||
|
||||
Beginnen wir mit einigen Beispielen dafür, was mit dieser neuen API erreicht werden kann. Sie ist besonders leistungsfähig, wenn es um
|
||||
Sie ist besonders leistungsstark, wenn es um multimodale Aufgaben geht. Lassen Sie uns also eine Runde drehen, um Bilder zu erzeugen und Text vorzulesen.
|
||||
|
||||
```py
|
||||
agent.run("Caption the following image", image=image)
|
||||
```
|
||||
|
||||
| **Input** | **Output** |
|
||||
|-----------------------------------------------------------------------------------------------------------------------------|-----------------------------------|
|
||||
| <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/beaver.png" width=200> | A beaver is swimming in the water |
|
||||
|
||||
---
|
||||
|
||||
```py
|
||||
agent.run("Read the following text out loud", text=text)
|
||||
```
|
||||
| **Input** | **Output** |
|
||||
|-------------------------------------------------------------------------------------------------------------------------|----------------------------------------------|
|
||||
| A beaver is swimming in the water | <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tts_example.wav" type="audio/wav"> your browser does not support the audio element. </audio>
|
||||
|
||||
---
|
||||
|
||||
```py
|
||||
agent.run(
|
||||
"In the following `document`, where will the TRRF Scientific Advisory Council Meeting take place?",
|
||||
document=document,
|
||||
)
|
||||
```
|
||||
| **Input** | **Output** |
|
||||
|-----------------------------------------------------------------------------------------------------------------------------|----------------|
|
||||
| <img src="https://datasets-server.huggingface.co/assets/hf-internal-testing/example-documents/--/hf-internal-testing--example-documents/test/0/image/image.jpg" width=200> | ballroom foyer |
|
||||
|
||||
## Schnellstart
|
||||
|
||||
Bevor Sie `agent.run` verwenden können, müssen Sie einen Agenten instanziieren, der ein großes Sprachmodell (LLM) ist.
|
||||
Wir bieten Unterstützung für openAI-Modelle sowie für OpenSource-Alternativen von BigCode und OpenAssistant. Die openAI
|
||||
Modelle sind leistungsfähiger (erfordern aber einen openAI-API-Schlüssel, können also nicht kostenlos verwendet werden); Hugging Face
|
||||
bietet kostenlosen Zugang zu Endpunkten für BigCode- und OpenAssistant-Modelle.
|
||||
|
||||
To start with, please install the `agents` extras in order to install all default dependencies.
|
||||
```bash
|
||||
pip install transformers[agents]
|
||||
```
|
||||
|
||||
Um openAI-Modelle zu verwenden, instanziieren Sie einen [`OpenAiAgent`], nachdem Sie die `openai`-Abhängigkeit installiert haben:
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
```
|
||||
|
||||
|
||||
```py
|
||||
from transformers import OpenAiAgent
|
||||
|
||||
agent = OpenAiAgent(model="text-davinci-003", api_key="<your_api_key>")
|
||||
```
|
||||
|
||||
Um BigCode oder OpenAssistant zu verwenden, melden Sie sich zunächst an, um Zugriff auf die Inference API zu erhalten:
|
||||
|
||||
```py
|
||||
from huggingface_hub import login
|
||||
|
||||
login("<YOUR_TOKEN>")
|
||||
```
|
||||
|
||||
Dann instanziieren Sie den Agenten
|
||||
|
||||
```py
|
||||
from transformers import HfAgent
|
||||
|
||||
# Starcoder
|
||||
agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder")
|
||||
# StarcoderBase
|
||||
# agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoderbase")
|
||||
# OpenAssistant
|
||||
# agent = HfAgent(url_endpoint="https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5")
|
||||
```
|
||||
|
||||
Dies geschieht mit der Inferenz-API, die Hugging Face derzeit kostenlos zur Verfügung stellt. Wenn Sie Ihren eigenen Inferenz
|
||||
Endpunkt für dieses Modell (oder einen anderen) haben, können Sie die obige URL durch Ihren URL-Endpunkt ersetzen.
|
||||
|
||||
<Tip>
|
||||
|
||||
StarCoder und OpenAssistant sind kostenlos und leisten bei einfachen Aufgaben bewundernswert gute Arbeit. Allerdings halten die Kontrollpunkte
|
||||
nicht, wenn es um komplexere Aufforderungen geht. Wenn Sie mit einem solchen Problem konfrontiert sind, empfehlen wir Ihnen, das OpenAI
|
||||
Modell auszuprobieren, das zwar leider nicht quelloffen ist, aber zur Zeit eine bessere Leistung erbringt.
|
||||
|
||||
</Tip>
|
||||
|
||||
Sie sind jetzt startklar! Lassen Sie uns in die beiden APIs eintauchen, die Ihnen jetzt zur Verfügung stehen.
|
||||
|
||||
### Einzelne Ausführung (run)
|
||||
|
||||
Die Methode der einmaligen Ausführung ist die Verwendung der [`~Agent.run`] Methode des Agenten:
|
||||
|
||||
```py
|
||||
agent.run("Draw me a picture of rivers and lakes.")
|
||||
```
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" width=200>
|
||||
|
||||
Es wählt automatisch das (oder die) Werkzeug(e) aus, das (die) für die von Ihnen gewünschte Aufgabe geeignet ist (sind) und führt es (sie) entsprechend aus. Es
|
||||
kann eine oder mehrere Aufgaben in der gleichen Anweisung ausführen (je komplexer Ihre Anweisung ist, desto wahrscheinlicher ist ein
|
||||
der Agent scheitern).
|
||||
|
||||
```py
|
||||
agent.run("Draw me a picture of the sea then transform the picture to add an island")
|
||||
```
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sea_and_island.png" width=200>
|
||||
|
||||
<br/>
|
||||
|
||||
|
||||
Jede [`~Agent.run`] Operation ist unabhängig, so dass Sie sie mehrmals hintereinander mit unterschiedlichen Aufgaben ausführen können.
|
||||
|
||||
Beachten Sie, dass Ihr `Agent` nur ein großsprachiges Modell ist, so dass kleine Variationen in Ihrer Eingabeaufforderung völlig unterschiedliche Ergebnisse liefern können.
|
||||
unterschiedliche Ergebnisse liefern. Es ist wichtig, dass Sie die Aufgabe, die Sie ausführen möchten, so genau wie möglich erklären. Wir gehen noch weiter ins Detail
|
||||
wie man gute Prompts schreibt [hier](custom_tools#writing-good-user-inputs).
|
||||
|
||||
Wenn Sie einen Status über Ausführungszeiten hinweg beibehalten oder dem Agenten Nicht-Text-Objekte übergeben möchten, können Sie dies tun, indem Sie
|
||||
Variablen, die der Agent verwenden soll. Sie könnten zum Beispiel das erste Bild von Flüssen und Seen erzeugen,
|
||||
und das Modell bitten, dieses Bild zu aktualisieren und eine Insel hinzuzufügen, indem Sie Folgendes tun:
|
||||
|
||||
```python
|
||||
picture = agent.run("Generate a picture of rivers and lakes.")
|
||||
updated_picture = agent.run("Transform the image in `picture` to add an island to it.", picture=picture)
|
||||
```
|
||||
|
||||
<Tip>
|
||||
|
||||
Dies kann hilfreich sein, wenn das Modell Ihre Anfrage nicht verstehen kann und die Werkzeuge verwechselt. Ein Beispiel wäre:
|
||||
|
||||
```py
|
||||
agent.run("Draw me the picture of a capybara swimming in the sea")
|
||||
```
|
||||
|
||||
Hier könnte das Modell auf zwei Arten interpretieren:
|
||||
- Die Funktion `Text-zu-Bild` erzeugt ein Wasserschwein, das im Meer schwimmt.
|
||||
- Oder Sie lassen das `Text-zu-Bild` ein Wasserschwein erzeugen und verwenden dann das Werkzeug `Bildtransformation`, um es im Meer schwimmen zu lassen.
|
||||
|
||||
Falls Sie das erste Szenario erzwingen möchten, können Sie dies tun, indem Sie die Eingabeaufforderung als Argument übergeben:
|
||||
|
||||
```py
|
||||
agent.run("Draw me a picture of the `prompt`", prompt="a capybara swimming in the sea")
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
||||
|
||||
### Chat-basierte Ausführung (Chat)
|
||||
|
||||
Der Agent verfügt auch über einen Chat-basierten Ansatz, der die Methode [`~Agent.chat`] verwendet:
|
||||
|
||||
```py
|
||||
agent.chat("Generate a picture of rivers and lakes")
|
||||
```
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" width=200>
|
||||
|
||||
```py
|
||||
agent.chat("Transform the picture so that there is a rock in there")
|
||||
```
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes_and_beaver.png" width=200>
|
||||
|
||||
<br/>
|
||||
|
||||
Dies ist ein interessanter Ansatz, wenn Sie den Zustand über Anweisungen hinweg beibehalten möchten. Er ist besser für Experimente geeignet,
|
||||
eignet sich aber eher für einzelne Anweisungen als für komplexe Anweisungen (die die [`~Agent.run`]
|
||||
Methode besser verarbeiten kann).
|
||||
|
||||
Diese Methode kann auch Argumente entgegennehmen, wenn Sie Nicht-Text-Typen oder bestimmte Aufforderungen übergeben möchten.
|
||||
|
||||
### ⚠️ Fernausführung
|
||||
|
||||
Zu Demonstrationszwecken und damit es mit allen Setups verwendet werden kann, haben wir Remote-Executors für mehrere
|
||||
der Standard-Tools erstellt, auf die der Agent in dieser Version Zugriff hat. Diese werden erstellt mit
|
||||
[inference endpoints](https://huggingface.co/inference-endpoints).
|
||||
|
||||
Wir haben diese vorerst deaktiviert, aber um zu sehen, wie Sie selbst Remote Executors Tools einrichten können,
|
||||
empfehlen wir die Lektüre des [custom tool guide](./custom_tools).
|
||||
|
||||
### Was passiert hier? Was sind Tools und was sind Agenten?
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/diagram.png">
|
||||
|
||||
#### Agenten
|
||||
|
||||
Der "Agent" ist hier ein großes Sprachmodell, das wir auffordern, Zugang zu einem bestimmten Satz von Tools zu erhalten.
|
||||
|
||||
LLMs sind ziemlich gut darin, kleine Codeproben zu erzeugen. Diese API macht sich das zunutze, indem sie das
|
||||
LLM ein kleines Codebeispiel gibt, das eine Aufgabe mit einer Reihe von Werkzeugen ausführt. Diese Aufforderung wird dann ergänzt durch die
|
||||
Aufgabe, die Sie Ihrem Agenten geben, und die Beschreibung der Werkzeuge, die Sie ihm geben. Auf diese Weise erhält er Zugriff auf die Dokumentation der
|
||||
Tools, insbesondere die erwarteten Eingaben und Ausgaben, und kann den entsprechenden Code generieren.
|
||||
|
||||
#### Tools
|
||||
|
||||
Tools sind sehr einfach: Sie bestehen aus einer einzigen Funktion mit einem Namen und einer Beschreibung. Wir verwenden dann die Beschreibungen dieser Tools
|
||||
um den Agenten aufzufordern. Anhand der Eingabeaufforderung zeigen wir dem Agenten, wie er die Tools nutzen kann, um das zu tun, was in der
|
||||
in der Abfrage angefordert wurde.
|
||||
|
||||
Dies geschieht mit brandneuen Tools und nicht mit Pipelines, denn der Agent schreibt besseren Code mit sehr atomaren Tools.
|
||||
Pipelines sind stärker refaktorisiert und fassen oft mehrere Aufgaben in einer einzigen zusammen. Tools sind dafür gedacht, sich auf
|
||||
eine einzige, sehr einfache Aufgabe konzentrieren.
|
||||
|
||||
#### Code-Ausführung?!
|
||||
|
||||
Dieser Code wird dann mit unserem kleinen Python-Interpreter auf den mit Ihren Tools übergebenen Eingaben ausgeführt.
|
||||
Wir hören Sie schon schreien "Willkürliche Codeausführung!", aber lassen Sie uns erklären, warum das nicht der Fall ist.
|
||||
|
||||
Die einzigen Funktionen, die aufgerufen werden können, sind die von Ihnen zur Verfügung gestellten Tools und die Druckfunktion, so dass Sie bereits eingeschränkt sind
|
||||
eingeschränkt, was ausgeführt werden kann. Sie sollten sicher sein, wenn es sich auf die Werkzeuge für das Umarmungsgesicht beschränkt.
|
||||
|
||||
Dann lassen wir keine Attributsuche oder Importe zu (die ohnehin nicht benötigt werden, um die
|
||||
Inputs/Outputs an eine kleine Gruppe von Funktionen), so dass alle offensichtlichen Angriffe (und Sie müssten den LLM
|
||||
dazu auffordern, sie auszugeben) kein Problem darstellen sollten. Wenn Sie auf Nummer sicher gehen wollen, können Sie die
|
||||
run()-Methode mit dem zusätzlichen Argument return_code=True ausführen. In diesem Fall gibt der Agent nur den auszuführenden Code
|
||||
zur Ausführung zurück und Sie können entscheiden, ob Sie ihn ausführen möchten oder nicht.
|
||||
|
||||
Die Ausführung bricht bei jeder Zeile ab, in der versucht wird, eine illegale Operation auszuführen, oder wenn ein regulärer Python-Fehler
|
||||
mit dem vom Agenten generierten Code.
|
||||
|
||||
### Ein kuratierter Satz von Tools
|
||||
|
||||
Wir haben eine Reihe von Tools identifiziert, die solche Agenten unterstützen können. Hier ist eine aktualisierte Liste der Tools, die wir integriert haben
|
||||
in `transformers` integriert haben:
|
||||
|
||||
- **Beantwortung von Fragen zu Dokumenten**: Beantworten Sie anhand eines Dokuments (z.B. PDF) im Bildformat eine Frage zu diesem Dokument ([Donut](./model_doc/donut))
|
||||
- Beantworten von Textfragen**: Geben Sie einen langen Text und eine Frage an, beantworten Sie die Frage im Text ([Flan-T5](./model_doc/flan-t5))
|
||||
- **Unbedingte Bildunterschriften**: Beschriften Sie das Bild! ([BLIP](./model_doc/blip))
|
||||
- **Bildfragebeantwortung**: Beantworten Sie bei einem Bild eine Frage zu diesem Bild ([VILT](./model_doc/vilt))
|
||||
- **Bildsegmentierung**: Geben Sie ein Bild und einen Prompt an und geben Sie die Segmentierungsmaske dieses Prompts aus ([CLIPSeg](./model_doc/clipseg))
|
||||
- **Sprache in Text**: Geben Sie eine Audioaufnahme einer sprechenden Person an und transkribieren Sie die Sprache in Text ([Whisper](./model_doc/whisper))
|
||||
- **Text in Sprache**: wandelt Text in Sprache um ([SpeechT5](./model_doc/speecht5))
|
||||
- **Zero-Shot-Textklassifizierung**: Ermitteln Sie anhand eines Textes und einer Liste von Bezeichnungen, welcher Bezeichnung der Text am ehesten entspricht ([BART](./model_doc/bart))
|
||||
- **Textzusammenfassung**: fassen Sie einen langen Text in einem oder wenigen Sätzen zusammen ([BART](./model_doc/bart))
|
||||
- **Übersetzung**: Übersetzen des Textes in eine bestimmte Sprache ([NLLB](./model_doc/nllb))
|
||||
|
||||
Diese Tools sind in Transformatoren integriert und können auch manuell verwendet werden, zum Beispiel:
|
||||
|
||||
```py
|
||||
from transformers import load_tool
|
||||
|
||||
tool = load_tool("text-to-speech")
|
||||
audio = tool("This is a text to speech tool")
|
||||
```
|
||||
|
||||
### Benutzerdefinierte Tools
|
||||
|
||||
Wir haben zwar eine Reihe von Tools identifiziert, sind aber der festen Überzeugung, dass der Hauptwert dieser Implementierung darin besteht
|
||||
die Möglichkeit, benutzerdefinierte Tools schnell zu erstellen und weiterzugeben.
|
||||
|
||||
Indem Sie den Code eines Tools in einen Hugging Face Space oder ein Modell-Repository stellen, können Sie das Tool
|
||||
direkt mit dem Agenten nutzen. Wir haben ein paar neue Funktionen hinzugefügt
|
||||
**transformers-agnostic** Tools zur [`huggingface-tools` Organisation](https://huggingface.co/huggingface-tools) hinzugefügt:
|
||||
|
||||
- **Text-Downloader**: zum Herunterladen eines Textes von einer Web-URL
|
||||
- **Text zu Bild**: erzeugt ein Bild nach einer Eingabeaufforderung und nutzt dabei stabile Diffusion
|
||||
- **Bildtransformation**: verändert ein Bild anhand eines Ausgangsbildes und einer Eingabeaufforderung, unter Ausnutzung der stabilen pix2pix-Diffusion
|
||||
- **Text zu Video**: Erzeugen eines kleinen Videos nach einer Eingabeaufforderung, unter Verwendung von damo-vilab
|
||||
|
||||
Das Text-zu-Bild-Tool, das wir von Anfang an verwendet haben, ist ein Remote-Tool, das sich in
|
||||
[*huggingface-tools/text-to-image*](https://huggingface.co/spaces/huggingface-tools/text-to-image)! Wir werden
|
||||
weiterhin solche Tools für diese und andere Organisationen veröffentlichen, um diese Implementierung weiter zu verbessern.
|
||||
|
||||
Die Agenten haben standardmäßig Zugriff auf die Tools, die sich auf [*huggingface-tools*](https://huggingface.co/huggingface-tools) befinden.
|
||||
Wie Sie Ihre eigenen Tools schreiben und freigeben können und wie Sie jedes benutzerdefinierte Tool, das sich auf dem Hub befindet, nutzen können, erklären wir in [folgender Anleitung](custom_tools).
|
||||
|
||||
### Code-Erzeugung
|
||||
|
||||
Bisher haben wir gezeigt, wie Sie die Agenten nutzen können, um Aktionen für Sie durchzuführen. Der Agent generiert jedoch nur Code
|
||||
den wir dann mit einem sehr eingeschränkten Python-Interpreter ausführen. Falls Sie den generierten Code in einer anderen Umgebung verwenden möchten
|
||||
einer anderen Umgebung verwenden möchten, können Sie den Agenten auffordern, den Code zusammen mit einer Tooldefinition und genauen Importen zurückzugeben.
|
||||
|
||||
Zum Beispiel die folgende Anweisung
|
||||
```python
|
||||
agent.run("Draw me a picture of rivers and lakes", return_code=True)
|
||||
```
|
||||
|
||||
gibt den folgenden Code zurück
|
||||
|
||||
```python
|
||||
from transformers import load_tool
|
||||
|
||||
image_generator = load_tool("huggingface-tools/text-to-image")
|
||||
|
||||
image = image_generator(prompt="rivers and lakes")
|
||||
```
|
||||
|
||||
die Sie dann selbst ändern und ausführen können.
|
@ -17,12 +17,18 @@
|
||||
title: Customizing model components
|
||||
- local: model_sharing
|
||||
title: Sharing
|
||||
- local: modular_transformers
|
||||
title: Contributing a new model to Transformers
|
||||
- local: add_new_model
|
||||
title: Legacy model contribution
|
||||
- local: auto_docstring
|
||||
title: Documenting a model
|
||||
title: Adding a new model to Transformers
|
||||
- local: modular_transformers
|
||||
title: Modular Transformers
|
||||
- local: task_summary
|
||||
title: What 🤗 Transformers can do
|
||||
- local: tasks_explained
|
||||
title: How 🤗 Transformers solve tasks
|
||||
- local: model_summary
|
||||
title: The Transformer model family
|
||||
- local: attention
|
||||
title: Attention mechanisms
|
||||
- local: attention_interface
|
||||
title: Customizing attention function
|
||||
title: Models
|
||||
@ -31,8 +37,6 @@
|
||||
title: Tokenizers
|
||||
- local: image_processors
|
||||
title: Image processors
|
||||
- local: video_processors
|
||||
title: Video processors
|
||||
- local: backbones
|
||||
title: Backbones
|
||||
- local: feature_extractors
|
||||
@ -68,10 +72,12 @@
|
||||
title: Prompt engineering
|
||||
- local: llm_optims
|
||||
title: Optimizing inference
|
||||
- local: cache_explanation
|
||||
title: Caching
|
||||
- local: kv_cache
|
||||
title: KV cache strategies
|
||||
- local: serving
|
||||
title: Serving
|
||||
- local: cache_explanation
|
||||
title: Caching
|
||||
- local: llm_tutorial_optimization
|
||||
title: Getting the most out of LLMs
|
||||
- local: perplexity
|
||||
@ -95,18 +101,16 @@
|
||||
- local: perf_infer_gpu_one
|
||||
title: GPU
|
||||
- local: perf_infer_gpu_multi
|
||||
title: Distributed inference
|
||||
title: Distributed GPU inference
|
||||
- local: perf_infer_cpu
|
||||
title: CPU
|
||||
- local: tf_xla
|
||||
title: XLA
|
||||
title: Optimization
|
||||
- local: agents
|
||||
title: Agents
|
||||
- local: tools
|
||||
title: Tools
|
||||
- local: serving
|
||||
title: Serving
|
||||
- local: transformers_as_backend
|
||||
title: Inference server backends
|
||||
title: Inference
|
||||
- isExpanded: false
|
||||
sections:
|
||||
@ -121,8 +125,8 @@
|
||||
title: Hyperparameter search
|
||||
title: Trainer API
|
||||
- sections:
|
||||
- local: accelerator_selection
|
||||
title: Accelerator selection
|
||||
- local: gpu_selection
|
||||
title: GPU selection
|
||||
- local: accelerate
|
||||
title: Accelerate
|
||||
- local: fsdp
|
||||
@ -141,10 +145,10 @@
|
||||
title: GPU
|
||||
- local: perf_train_cpu
|
||||
title: CPU
|
||||
- local: perf_train_tpu_tf
|
||||
title: TPU
|
||||
- local: perf_train_special
|
||||
title: Apple Silicon
|
||||
- local: perf_train_gaudi
|
||||
title: Intel Gaudi
|
||||
- local: perf_hardware
|
||||
title: Build your own machine
|
||||
title: Hardware
|
||||
@ -157,14 +161,8 @@
|
||||
sections:
|
||||
- local: quantization/overview
|
||||
title: Overview
|
||||
- local: quantization/selecting
|
||||
title: Selecting a quantization method
|
||||
- local: quantization/concept_guide
|
||||
title: Quantization concepts
|
||||
- local: quantization/aqlm
|
||||
title: AQLM
|
||||
- local: quantization/auto_round
|
||||
title: AutoRound
|
||||
- local: quantization/awq
|
||||
title: AWQ
|
||||
- local: quantization/bitnet
|
||||
@ -179,8 +177,6 @@
|
||||
title: FBGEMM
|
||||
- local: quantization/finegrained_fp8
|
||||
title: Fine-grained FP8
|
||||
- local: quantization/fp_quant
|
||||
title: FP-Quant
|
||||
- local: gguf
|
||||
title: GGUF
|
||||
- local: quantization/gptq
|
||||
@ -283,8 +279,6 @@
|
||||
title: Image-text-to-text
|
||||
- local: tasks/video_text_to_text
|
||||
title: Video-text-to-text
|
||||
- local: tasks/visual_document_retrieval
|
||||
title: Visual Document Retrieval
|
||||
title: Multimodal
|
||||
title: Task recipes
|
||||
- local: run_scripts
|
||||
@ -312,6 +306,8 @@
|
||||
- isExpanded: false
|
||||
sections:
|
||||
- sections:
|
||||
- local: main_classes/agent
|
||||
title: Agents and Tools
|
||||
- local: model_doc/auto
|
||||
title: Auto Classes
|
||||
- local: main_classes/backbones
|
||||
@ -356,15 +352,11 @@
|
||||
title: Feature Extractor
|
||||
- local: main_classes/image_processor
|
||||
title: Image Processor
|
||||
- local: main_classes/video_processor
|
||||
title: Video Processor
|
||||
title: Main Classes
|
||||
title: Main classes
|
||||
- sections:
|
||||
- sections:
|
||||
- local: model_doc/albert
|
||||
title: ALBERT
|
||||
- local: model_doc/arcee
|
||||
title: Arcee
|
||||
- local: model_doc/bamba
|
||||
title: Bamba
|
||||
- local: model_doc/bart
|
||||
@ -380,15 +372,13 @@
|
||||
- local: model_doc/bert-japanese
|
||||
title: BertJapanese
|
||||
- local: model_doc/bertweet
|
||||
title: BERTweet
|
||||
title: Bertweet
|
||||
- local: model_doc/big_bird
|
||||
title: BigBird
|
||||
- local: model_doc/bigbird_pegasus
|
||||
title: BigBirdPegasus
|
||||
- local: model_doc/biogpt
|
||||
title: BioGpt
|
||||
- local: model_doc/bitnet
|
||||
title: BitNet
|
||||
- local: model_doc/blenderbot
|
||||
title: Blenderbot
|
||||
- local: model_doc/blenderbot-small
|
||||
@ -425,18 +415,12 @@
|
||||
title: DeBERTa
|
||||
- local: model_doc/deberta-v2
|
||||
title: DeBERTa-v2
|
||||
- local: model_doc/deepseek_v3
|
||||
title: DeepSeek-V3
|
||||
- local: model_doc/dialogpt
|
||||
title: DialoGPT
|
||||
- local: model_doc/diffllama
|
||||
title: DiffLlama
|
||||
- local: model_doc/distilbert
|
||||
title: DistilBERT
|
||||
- local: model_doc/doge
|
||||
title: Doge
|
||||
- local: model_doc/dots1
|
||||
title: dots1
|
||||
- local: model_doc/dpr
|
||||
title: DPR
|
||||
- local: model_doc/electra
|
||||
@ -445,22 +429,14 @@
|
||||
title: Encoder Decoder Models
|
||||
- local: model_doc/ernie
|
||||
title: ERNIE
|
||||
- local: model_doc/ernie4_5
|
||||
title: Ernie4_5
|
||||
- local: model_doc/ernie4_5_moe
|
||||
title: Ernie4_5_MoE
|
||||
- local: model_doc/ernie_m
|
||||
title: ErnieM
|
||||
- local: model_doc/esm
|
||||
title: ESM
|
||||
- local: model_doc/exaone4
|
||||
title: EXAONE-4.0
|
||||
- local: model_doc/falcon
|
||||
title: Falcon
|
||||
- local: model_doc/falcon3
|
||||
title: Falcon3
|
||||
- local: model_doc/falcon_h1
|
||||
title: FalconH1
|
||||
- local: model_doc/falcon_mamba
|
||||
title: FalconMamba
|
||||
- local: model_doc/flan-t5
|
||||
@ -483,10 +459,6 @@
|
||||
title: Gemma2
|
||||
- local: model_doc/glm
|
||||
title: GLM
|
||||
- local: model_doc/glm4
|
||||
title: glm4
|
||||
- local: model_doc/glm4_moe
|
||||
title: glm4_moe
|
||||
- local: model_doc/openai-gpt
|
||||
title: GPT
|
||||
- local: model_doc/gpt_neo
|
||||
@ -509,16 +481,14 @@
|
||||
title: Granite
|
||||
- local: model_doc/granitemoe
|
||||
title: GraniteMoe
|
||||
- local: model_doc/granitemoehybrid
|
||||
title: GraniteMoeHybrid
|
||||
- local: model_doc/granitemoeshared
|
||||
title: GraniteMoeShared
|
||||
- local: model_doc/granitevision
|
||||
title: GraniteVision
|
||||
- local: model_doc/helium
|
||||
title: Helium
|
||||
- local: model_doc/herbert
|
||||
title: HerBERT
|
||||
- local: model_doc/hgnet_v2
|
||||
title: HGNet-V2
|
||||
- local: model_doc/ibert
|
||||
title: I-BERT
|
||||
- local: model_doc/jamba
|
||||
@ -529,8 +499,6 @@
|
||||
title: Jukebox
|
||||
- local: model_doc/led
|
||||
title: LED
|
||||
- local: model_doc/lfm2
|
||||
title: LFM2
|
||||
- local: model_doc/llama
|
||||
title: LLaMA
|
||||
- local: model_doc/llama2
|
||||
@ -550,7 +518,7 @@
|
||||
- local: model_doc/mamba
|
||||
title: Mamba
|
||||
- local: model_doc/mamba2
|
||||
title: Mamba2
|
||||
title: mamba2
|
||||
- local: model_doc/marian
|
||||
title: MarianMT
|
||||
- local: model_doc/markuplm
|
||||
@ -563,10 +531,10 @@
|
||||
title: MegatronBERT
|
||||
- local: model_doc/megatron_gpt2
|
||||
title: MegatronGPT2
|
||||
- local: model_doc/minimax
|
||||
title: MiniMax
|
||||
- local: model_doc/mistral
|
||||
title: Mistral
|
||||
- local: model_doc/mistral3
|
||||
title: Mistral3
|
||||
- local: model_doc/mixtral
|
||||
title: Mixtral
|
||||
- local: model_doc/mluke
|
||||
@ -575,8 +543,6 @@
|
||||
title: MobileBERT
|
||||
- local: model_doc/modernbert
|
||||
title: ModernBert
|
||||
- local: model_doc/modernbert-decoder
|
||||
title: ModernBERTDecoder
|
||||
- local: model_doc/mpnet
|
||||
title: MPNet
|
||||
- local: model_doc/mpt
|
||||
@ -619,6 +585,8 @@
|
||||
title: Phi
|
||||
- local: model_doc/phi3
|
||||
title: Phi-3
|
||||
- local: model_doc/phi4_multimodal
|
||||
title: Phi4 Multimodal
|
||||
- local: model_doc/phimoe
|
||||
title: PhiMoE
|
||||
- local: model_doc/phobert
|
||||
@ -633,10 +601,6 @@
|
||||
title: Qwen2
|
||||
- local: model_doc/qwen2_moe
|
||||
title: Qwen2MoE
|
||||
- local: model_doc/qwen3
|
||||
title: Qwen3
|
||||
- local: model_doc/qwen3_moe
|
||||
title: Qwen3MoE
|
||||
- local: model_doc/rag
|
||||
title: RAG
|
||||
- local: model_doc/realm
|
||||
@ -671,8 +635,6 @@
|
||||
title: SwitchTransformers
|
||||
- local: model_doc/t5
|
||||
title: T5
|
||||
- local: model_doc/t5gemma
|
||||
title: T5Gemma
|
||||
- local: model_doc/t5v1.1
|
||||
title: T5v1.1
|
||||
- local: model_doc/tapex
|
||||
@ -699,8 +661,6 @@
|
||||
title: XLM-V
|
||||
- local: model_doc/xlnet
|
||||
title: XLNet
|
||||
- local: model_doc/xlstm
|
||||
title: xLSTM
|
||||
- local: model_doc/yoso
|
||||
title: YOSO
|
||||
- local: model_doc/zamba
|
||||
@ -709,8 +669,6 @@
|
||||
title: Zamba2
|
||||
title: Text models
|
||||
- sections:
|
||||
- local: model_doc/aimv2
|
||||
title: Aimv2
|
||||
- local: model_doc/beit
|
||||
title: BEiT
|
||||
- local: model_doc/bit
|
||||
@ -723,16 +681,8 @@
|
||||
title: ConvNeXTV2
|
||||
- local: model_doc/cvt
|
||||
title: CvT
|
||||
- local: model_doc/d_fine
|
||||
title: D-FINE
|
||||
- local: model_doc/dab-detr
|
||||
title: DAB-DETR
|
||||
- local: model_doc/deepseek_v2
|
||||
title: DeepSeek-V2
|
||||
- local: model_doc/deepseek_vl
|
||||
title: DeepseekVL
|
||||
- local: model_doc/deepseek_vl_hybrid
|
||||
title: DeepseekVLHybrid
|
||||
- local: model_doc/deformable_detr
|
||||
title: Deformable DETR
|
||||
- local: model_doc/deit
|
||||
@ -759,12 +709,8 @@
|
||||
title: DPT
|
||||
- local: model_doc/efficientformer
|
||||
title: EfficientFormer
|
||||
- local: model_doc/efficientloftr
|
||||
title: EfficientLoFTR
|
||||
- local: model_doc/efficientnet
|
||||
title: EfficientNet
|
||||
- local: model_doc/eomt
|
||||
title: EoMT
|
||||
- local: model_doc/focalnet
|
||||
title: FocalNet
|
||||
- local: model_doc/glpn
|
||||
@ -777,14 +723,10 @@
|
||||
title: ImageGPT
|
||||
- local: model_doc/levit
|
||||
title: LeViT
|
||||
- local: model_doc/lightglue
|
||||
title: LightGlue
|
||||
- local: model_doc/mask2former
|
||||
title: Mask2Former
|
||||
- local: model_doc/maskformer
|
||||
title: MaskFormer
|
||||
- local: model_doc/mlcd
|
||||
title: MLCD
|
||||
- local: model_doc/mobilenet_v1
|
||||
title: MobileNetV1
|
||||
- local: model_doc/mobilenet_v2
|
||||
@ -863,22 +805,14 @@
|
||||
title: Bark
|
||||
- local: model_doc/clap
|
||||
title: CLAP
|
||||
- local: model_doc/csm
|
||||
title: CSM
|
||||
- local: model_doc/dac
|
||||
title: dac
|
||||
- local: model_doc/dia
|
||||
title: Dia
|
||||
- local: model_doc/encodec
|
||||
title: EnCodec
|
||||
- local: model_doc/fastspeech2_conformer
|
||||
title: FastSpeech2Conformer
|
||||
- local: model_doc/granite_speech
|
||||
title: GraniteSpeech
|
||||
- local: model_doc/hubert
|
||||
title: Hubert
|
||||
- local: model_doc/kyutai_speech_to_text
|
||||
title: Kyutai Speech-To-Text
|
||||
- local: model_doc/mctct
|
||||
title: MCTCT
|
||||
- local: model_doc/mimi
|
||||
@ -937,8 +871,6 @@
|
||||
- sections:
|
||||
- local: model_doc/timesformer
|
||||
title: TimeSformer
|
||||
- local: model_doc/vjepa2
|
||||
title: V-JEPA 2
|
||||
- local: model_doc/videomae
|
||||
title: VideoMAE
|
||||
- local: model_doc/vivit
|
||||
@ -973,8 +905,6 @@
|
||||
title: CLVP
|
||||
- local: model_doc/colpali
|
||||
title: ColPali
|
||||
- local: model_doc/colqwen2
|
||||
title: ColQwen2
|
||||
- local: model_doc/data2vec
|
||||
title: Data2Vec
|
||||
- local: model_doc/deplot
|
||||
@ -983,22 +913,14 @@
|
||||
title: Donut
|
||||
- local: model_doc/emu3
|
||||
title: Emu3
|
||||
- local: model_doc/evolla
|
||||
title: Evolla
|
||||
- local: model_doc/flava
|
||||
title: FLAVA
|
||||
- local: model_doc/gemma3
|
||||
title: Gemma3
|
||||
- local: model_doc/gemma3n
|
||||
title: Gemma3n
|
||||
- local: model_doc/git
|
||||
title: GIT
|
||||
- local: model_doc/glm4v
|
||||
title: glm4v
|
||||
- local: model_doc/got_ocr2
|
||||
title: GOT-OCR2
|
||||
- local: model_doc/granitevision
|
||||
title: GraniteVision
|
||||
- local: model_doc/grounding-dino
|
||||
title: Grounding DINO
|
||||
- local: model_doc/groupvit
|
||||
@ -1013,10 +935,6 @@
|
||||
title: InstructBLIP
|
||||
- local: model_doc/instructblipvideo
|
||||
title: InstructBlipVideo
|
||||
- local: model_doc/internvl
|
||||
title: InternVL
|
||||
- local: model_doc/janus
|
||||
title: Janus
|
||||
- local: model_doc/kosmos-2
|
||||
title: KOSMOS-2
|
||||
- local: model_doc/layoutlm
|
||||
@ -1029,8 +947,6 @@
|
||||
title: LayoutXLM
|
||||
- local: model_doc/lilt
|
||||
title: LiLT
|
||||
- local: model_doc/llama4
|
||||
title: Llama4
|
||||
- local: model_doc/llava
|
||||
title: Llava
|
||||
- local: model_doc/llava_next
|
||||
@ -1045,8 +961,6 @@
|
||||
title: MatCha
|
||||
- local: model_doc/mgp-str
|
||||
title: MGP-STR
|
||||
- local: model_doc/mistral3
|
||||
title: Mistral3
|
||||
- local: model_doc/mllama
|
||||
title: mllama
|
||||
- local: model_doc/nougat
|
||||
@ -1063,16 +977,10 @@
|
||||
title: PaliGemma
|
||||
- local: model_doc/perceiver
|
||||
title: Perceiver
|
||||
- local: model_doc/perception_lm
|
||||
title: PerceptionLM
|
||||
- local: model_doc/phi4_multimodal
|
||||
title: Phi4 Multimodal
|
||||
- local: model_doc/pix2struct
|
||||
title: Pix2Struct
|
||||
- local: model_doc/pixtral
|
||||
title: Pixtral
|
||||
- local: model_doc/qwen2_5_omni
|
||||
title: Qwen2.5-Omni
|
||||
- local: model_doc/qwen2_5_vl
|
||||
title: Qwen2.5-VL
|
||||
- local: model_doc/qwen2_audio
|
||||
@ -1081,16 +989,12 @@
|
||||
title: Qwen2VL
|
||||
- local: model_doc/sam
|
||||
title: Segment Anything
|
||||
- local: model_doc/sam_hq
|
||||
title: Segment Anything High Quality
|
||||
- local: model_doc/shieldgemma2
|
||||
title: ShieldGemma2
|
||||
- local: model_doc/siglip
|
||||
title: SigLIP
|
||||
- local: model_doc/siglip2
|
||||
title: SigLIP2
|
||||
- local: model_doc/smollm3
|
||||
title: SmolLM3
|
||||
- local: model_doc/smolvlm
|
||||
title: SmolVLM
|
||||
- local: model_doc/speech-encoder-decoder
|
||||
@ -1117,8 +1021,6 @@
|
||||
title: Vision Text Dual Encoder
|
||||
- local: model_doc/visual_bert
|
||||
title: VisualBERT
|
||||
- local: model_doc/voxtral
|
||||
title: Voxtral
|
||||
- local: model_doc/xclip
|
||||
title: X-CLIP
|
||||
title: Multimodal models
|
||||
@ -1139,8 +1041,6 @@
|
||||
title: PatchTST
|
||||
- local: model_doc/time_series_transformer
|
||||
title: Time Series Transformer
|
||||
- local: model_doc/timesfm
|
||||
title: TimesFM
|
||||
title: Time series models
|
||||
- sections:
|
||||
- local: model_doc/graphormer
|
||||
@ -1166,13 +1066,7 @@
|
||||
title: Utilities for Audio processing
|
||||
- local: internal/file_utils
|
||||
title: General Utilities
|
||||
- local: internal/import_utils
|
||||
title: Importing Utilities
|
||||
- local: internal/time_series_utils
|
||||
title: Utilities for Time Series
|
||||
title: Internal helpers
|
||||
- sections:
|
||||
- local: reference/environment_variables
|
||||
title: Environment Variables
|
||||
title: Reference
|
||||
title: API
|
||||
|
@ -75,7 +75,7 @@ training_args = TrainingArguments(
|
||||
per_device_eval_batch_size=16,
|
||||
num_train_epochs=2,
|
||||
fsdp_config="path/to/fsdp_config",
|
||||
fsdp="full_shard",
|
||||
fsdp_strategy="full_shard",
|
||||
weight_decay=0.01,
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
|
@ -1,126 +0,0 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Accelerator selection
|
||||
|
||||
During distributed training, you can specify the number and order of accelerators (CUDA, XPU, MPS, HPU, etc.) to use. This can be useful when you have accelerators with different computing power and you want to use the faster accelerator first. Or you could only use a subset of the available accelerators. The selection process works for both [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html) and [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html). You don't need Accelerate or [DeepSpeed integration](./main_classes/deepspeed).
|
||||
|
||||
This guide will show you how to select the number of accelerators to use and the order to use them in.
|
||||
|
||||
## Number of accelerators
|
||||
|
||||
For example, if there are 4 accelerators and you only want to use the first 2, run the command below.
|
||||
|
||||
<hfoptions id="select-accelerator">
|
||||
<hfoption id="torchrun">
|
||||
|
||||
Use the `--nproc_per_node` to select how many accelerators to use.
|
||||
|
||||
```bash
|
||||
torchrun --nproc_per_node=2 trainer-program.py ...
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Accelerate">
|
||||
|
||||
Use `--num_processes` to select how many accelerators to use.
|
||||
|
||||
```bash
|
||||
accelerate launch --num_processes 2 trainer-program.py ...
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="DeepSpeed">
|
||||
|
||||
Use `--num_gpus` to select how many GPUs to use.
|
||||
|
||||
```bash
|
||||
deepspeed --num_gpus 2 trainer-program.py ...
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Order of accelerators
|
||||
To select specific accelerators to use and their order, use the environment variable appropriate for your hardware. This is often set on the command line for each run, but can also be added to your `~/.bashrc` or other startup config file.
|
||||
|
||||
For example, if there are 4 accelerators (0, 1, 2, 3) and you only want to run accelerators 0 and 2:
|
||||
|
||||
<hfoptions id="accelerator-type">
|
||||
<hfoption id="CUDA">
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
|
||||
```
|
||||
|
||||
Only GPUs 0 and 2 are "visible" to PyTorch and are mapped to `cuda:0` and `cuda:1` respectively.
|
||||
To reverse the order (use GPU 2 as `cuda:0` and GPU 0 as `cuda:1`):
|
||||
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py ...
|
||||
```
|
||||
|
||||
To run without any GPUs:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES= python trainer-program.py ...
|
||||
```
|
||||
|
||||
You can also control the order of CUDA devices using `CUDA_DEVICE_ORDER`:
|
||||
|
||||
- Order by PCIe bus ID (matches `nvidia-smi`):
|
||||
|
||||
```bash
|
||||
export CUDA_DEVICE_ORDER=PCI_BUS_ID
|
||||
```
|
||||
|
||||
- Order by compute capability (fastest first):
|
||||
|
||||
```bash
|
||||
export CUDA_DEVICE_ORDER=FASTEST_FIRST
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Intel XPU">
|
||||
|
||||
```bash
|
||||
ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
|
||||
```
|
||||
|
||||
Only XPUs 0 and 2 are "visible" to PyTorch and are mapped to `xpu:0` and `xpu:1` respectively.
|
||||
To reverse the order (use XPU 2 as `xpu:0` and XPU 0 as `xpu:1`):
|
||||
|
||||
```bash
|
||||
ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
|
||||
```
|
||||
|
||||
|
||||
You can also control the order of Intel XPUs with:
|
||||
|
||||
```bash
|
||||
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
||||
```
|
||||
|
||||
For more information about device enumeration and sorting on Intel XPU, please refer to the [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) documentation.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
|
||||
|
||||
> [!WARNING]
|
||||
> Environment variables can be exported instead of being added to the command line. This is not recommended because it can be confusing if you forget how the environment variable was set up and you end up using the wrong accelerators. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
|
@ -13,7 +13,7 @@ rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Legacy model contribution
|
||||
# Adding a new model to Transformers
|
||||
|
||||
> [!TIP]
|
||||
> Try adding new models with a more [modular](./modular_transformers) approach first. This makes it significantly easier to contribute a model to Transformers!
|
||||
@ -161,7 +161,7 @@ The downside is that if you aren't used to them, it may take some time to get us
|
||||
Run the command below to start and complete the questionnaire with some basic information about the new model. This command jumpstarts the process by automatically generating some model code that you'll need to adapt.
|
||||
|
||||
```bash
|
||||
transformers add-new-model-like
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
## Create a pull request
|
||||
@ -292,7 +292,7 @@ Once you're able to run the original checkpoint, you're ready to start adapting
|
||||
|
||||
## Adapt the model code
|
||||
|
||||
The `transformers add-new-model-like` command should have generated a model and configuration file.
|
||||
The `transformers-cli add-new-model-like` command should have generated a model and configuration file.
|
||||
|
||||
- `src/transformers/models/brand_new_llama/modeling_brand_new_llama.py`
|
||||
- `src/transformers/models/brand_new_llama/configuration_brand_new_llama.py`
|
||||
@ -551,10 +551,10 @@ While this example doesn't include an image processor, you may need to implement
|
||||
|
||||
If you do need to implement a new image processor, refer to an existing image processor to understand the expected structure. Slow image processors ([`BaseImageProcessor`]) and fast image processors ([`BaseImageProcessorFast`]) are designed differently, so make sure you follow the correct structure based on the processor type you're implementing.
|
||||
|
||||
Run the following command (only if you haven't already created the fast image processor with the `transformers add-new-model-like` command) to generate the necessary imports and to create a prefilled template for the fast image processor. Modify the template to fit your model.
|
||||
Run the following command (only if you haven't already created the fast image processor with the `transformers-cli add-new-model-like` command) to generate the necessary imports and to create a prefilled template for the fast image processor. Modify the template to fit your model.
|
||||
|
||||
```bash
|
||||
transformers add-fast-image-processor --model-name your_model_name
|
||||
transformers-cli add-fast-image-processor --model-name your_model_name
|
||||
```
|
||||
|
||||
This command will generate the necessary imports and provide a pre-filled template for the fast image processor. You can then modify it to fit your model's needs.
|
||||
@ -571,7 +571,7 @@ The processor should call the appropriate modality-specific processors within it
|
||||
def __call__(
|
||||
self,
|
||||
images: ImageInput = None,
|
||||
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
|
||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
||||
audio=None,
|
||||
videos=None,
|
||||
**kwargs: Unpack[YourModelProcessorKwargs],
|
||||
|
@ -14,9 +14,284 @@ rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
> [!WARNING]
|
||||
> Agents and tools are being spun out into the standalone [smolagents](https://huggingface.co/docs/smolagents/index) library. These docs will be deprecated in the future!
|
||||
|
||||
# Agents
|
||||
|
||||
(deprecated)
|
||||
[[open-in-colab]]
|
||||
|
||||
An agent is a system where a large language model (LLM) can execute more complex tasks through *planning* and using *tools*.
|
||||
|
||||
- Planning helps a LLM reason its way through a task by breaking it down into smaller subtasks. For example, [`CodeAgent`] plans a series of actions to take and then generates Python code to execute all the actions at once.
|
||||
|
||||
Another planning method is by self-reflection and refinement of its previous actions to improve its performance. The [`ReactJsonAgent`] is an example of this type of planning, and it's based on the [ReAct](https://hf.co/papers/2210.03629) framework. This agent plans and executes actions one at a time based on the feedback it receives from each action.
|
||||
|
||||
- Tools give a LLM access to external functions or APIs that it can use to help it complete a task. For example, [gradio-tools](https://github.com/freddyaboulton/gradio-tools) gives a LLM access to any of the [Gradio](https://www.gradio.app/) apps available on Hugging Face [Spaces](https://hf.co/spaces). These apps can be used for a wide range of tasks such as image generation, video generation, audio transcription, and more.
|
||||
|
||||
To use agents in Transformers, make sure you have the extra `agents` dependencies installed.
|
||||
|
||||
```bash
|
||||
!pip install transformers[agents]
|
||||
```
|
||||
|
||||
Create an agent instance (refer to the [Agents](./main_classes/agent#agents) API for supported agents in Transformers) and a list of tools available for it to use, then [`~ReactAgent.run`] the agent on your task. The example below demonstrates how a ReAct agent reasons through a task.
|
||||
|
||||
```py
|
||||
from transformers import ReactCodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[])
|
||||
agent.run(
|
||||
"How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
|
||||
)
|
||||
```
|
||||
|
||||
```bash
|
||||
======== New task ========
|
||||
How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
|
||||
==== Agent is executing the code below:
|
||||
bert_layers = 12 # BERT base encoder has 12 layers
|
||||
attention_layers = 6 # Encoder in Attention is All You Need has 6 layers
|
||||
layer_diff = bert_layers - attention_layers
|
||||
print("The difference in layers between BERT base encoder and Attention is All You Need is", layer_diff)
|
||||
====
|
||||
Print outputs:
|
||||
The difference in layers between BERT base encoder and Attention is All You Need is 6
|
||||
|
||||
==== Agent is executing the code below:
|
||||
final_answer("BERT base encoder has {} more layers than the encoder from Attention is All You Need.".format(layer_diff))
|
||||
====
|
||||
Print outputs:
|
||||
|
||||
>>> Final answer:
|
||||
BERT base encoder has 6 more layers than the encoder from Attention is All You Need.
|
||||
```
|
||||
|
||||
This guide will walk you through in more detail how to initialize an agent.
|
||||
|
||||
## LLM
|
||||
|
||||
An agent uses a LLM to plan and execute a task; it is the engine that powers the agent. To choose and build your own LLM engine, you need a method that:
|
||||
|
||||
1. the input uses the [chat template](./chat_templating) format, `List[Dict[str, str]]`, and it returns a string
|
||||
2. the LLM stops generating outputs when it encounters the sequences in `stop_sequences`
|
||||
|
||||
```py
|
||||
def llm_engine(messages, stop_sequences=["Task"]) -> str:
|
||||
response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
|
||||
answer = response.choices[0].message.content
|
||||
return answer
|
||||
```
|
||||
|
||||
Next, initialize an engine to load a model. To run an agent locally, create a [`TransformersEngine`] to load a preinitialized [`Pipeline`].
|
||||
|
||||
However, you could also leverage Hugging Face's powerful inference infrastructure, [Inference API](https://hf.co/docs/api-inference/index) or [Inference Endpoints](https://hf.co/docs/inference-endpoints/index), to run your model. This is useful for loading larger models that are typically required for agentic behavior. In this case, load the [`HfApiEngine`] to run the agent.
|
||||
|
||||
The agent requires a list of tools it can use to complete a task. If you aren't using any additional tools, pass an empty list. The default tools provided by Transformers are loaded automatically, but you can optionally set `add_base_tools=True` to explicitly enable them.
|
||||
|
||||
<hfoptions id="engine">
|
||||
<hfoption id="TransformersEngine">
|
||||
|
||||
```py
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TransformersEngine, CodeAgent
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct").to("cuda")
|
||||
pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
||||
llm_engine = TransformersEngine(pipeline)
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine)
|
||||
agent.run(
|
||||
"What causes bread to rise?",
|
||||
)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="HfApiEngine">
|
||||
|
||||
```py
|
||||
from transformers import CodeAgent, HfApiEngine
|
||||
|
||||
llm_engine = HfApiEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine)
|
||||
agent.run(
|
||||
"Could you translate this sentence from French, say it out loud and return the audio.",
|
||||
sentence="Où est la boulangerie la plus proche?",
|
||||
)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
The agent supports [constrained generation](https://hf.co/docs/text-generation-inference/conceptual/guidance) for generating outputs according to a specific structure with the `grammar` parameter. The `grammar` parameter should be specified in the `llm_engine` method or you can set it when initializing an agent.
|
||||
|
||||
Lastly, an agent accepts additional inputs such as text and audio. In the [`HfApiEngine`] example above, the agent accepted a sentence to translate. But you could also pass a path to a local or remote file for the agent to access. The example below demonstrates how to pass a path to an audio file.
|
||||
|
||||
```py
|
||||
from transformers import ReactCodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[], llm_engine=llm_engine)
|
||||
agent.run("Why doesn't he know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
|
||||
```
|
||||
|
||||
## System prompt
|
||||
|
||||
A system prompt describes how an agent should behave, a description of the available tools, and the expected output format.
|
||||
|
||||
Tools are defined by the `<<tool_descriptions>>` token which is dynamically replaced during runtime with the actual tool. The tool description is derived from the tool name, description, inputs, output type, and a Jinja2 template. Refer to the [Tools](./tools) guide for more information about how to describe tools.
|
||||
|
||||
The example below is the system prompt for [`ReactCodeAgent`].
|
||||
|
||||
```py
|
||||
You will be given a task to solve as best you can.
|
||||
You have access to the following tools:
|
||||
<<tool_descriptions>>
|
||||
|
||||
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
|
||||
|
||||
At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
|
||||
Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '/End code' sequence.
|
||||
During each intermediate step, you can use 'print()' to save whatever important information you will then need.
|
||||
These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
|
||||
|
||||
In the end you have to return a final answer using the `final_answer` tool.
|
||||
|
||||
Here are a few examples using notional tools:
|
||||
---
|
||||
{examples}
|
||||
|
||||
Above example were using notional tools that might not exist for you. You only have access to those tools:
|
||||
<<tool_names>>
|
||||
You also can perform computations in the python code you generate.
|
||||
|
||||
Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```<end_code>' sequence. You MUST provide at least the 'Code:' sequence to move forward.
|
||||
|
||||
Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
|
||||
Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
|
||||
|
||||
Remember to make sure that variables you use are all defined.
|
||||
|
||||
Now Begin!
|
||||
```
|
||||
|
||||
The system prompt can be tailored to the intended task. For example, you can add a better explanation of the output format or you can overwrite the system prompt template entirely with your own custom system prompt as shown below.
|
||||
|
||||
> [!WARNING]
|
||||
> Agents and tools were spun out into the standalone [smolagents](https://huggingface.co/docs/smolagents/index) library. They were removed from `transformers` in v4.52.
|
||||
> If you're writing a custom system prompt, make sure to include `<<tool_descriptions>>` in the template so the agent is aware of the available tools.
|
||||
|
||||
```py
|
||||
from transformers import ReactJsonAgent
|
||||
from transformers.agents import PythonInterpreterTool
|
||||
|
||||
agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
|
||||
```
|
||||
|
||||
## Code execution
|
||||
|
||||
For safety, only the tools you provide (and the default Transformers tools) and the `print` function are executed. The interpreter doesn't allow importing modules that aren't on a safe list.
|
||||
|
||||
To import modules that aren't on the list, add them as a list to the `additional_authorized_imports` parameter when initializing an agent.
|
||||
|
||||
```py
|
||||
from transformers import ReactCodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
|
||||
agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
|
||||
```
|
||||
|
||||
Code execution stops if a tool isn't on the safe list, it isn't authorized, or if the code generated by the agent returns a Python error.
|
||||
|
||||
> [!WARNING]
|
||||
> A LLM can generate any arbitrary code that can be executed, so don't add any unsafe imports!
|
||||
|
||||
## Multi-agent
|
||||
|
||||
[Multi-agent](https://hf.co/papers/2308.08155) refers to multiple agents working together to solve a task. Performance is typically better because each agent is specialized for a particular subtask.
|
||||
|
||||
Multi-agents are created through a [`ManagedAgent`] class, where a *manager agent* oversees how other agents work together. The manager agent requires an agent and their name and description. These are added to the manager agents system prompt which lets it know how to call and use them.
|
||||
|
||||
The multi-agent example below creates a web search agent that is managed by another [`ReactCodeAgent`].
|
||||
|
||||
```py
|
||||
from transformers.agents import ReactCodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
|
||||
|
||||
llm_engine = HfApiEngine()
|
||||
web_agent = ReactCodeAgent(tools=[DuckDuckGoSearchTool()], llm_engine=llm_engine)
|
||||
managed_web_agent = ManagedAgent(
|
||||
agent=web_agent,
|
||||
name="web_search",
|
||||
description="Runs web searches for you. Give it your query as an argument."
|
||||
)
|
||||
manager_agent = ReactCodeAgent(
|
||||
tools=[], llm_engine=llm_engine, managed_agents=[managed_web_agent]
|
||||
)
|
||||
manager_agent.run("Who is the CEO of Hugging Face?")
|
||||
```
|
||||
|
||||
## Gradio integration
|
||||
|
||||
[Gradio](https://www.gradio.app/) is a library for quickly creating and sharing machine learning apps. The [gradio.Chatbot](https://www.gradio.app/docs/gradio/chatbot) supports chatting with a Transformers agent with the [`stream_to_gradio`] function.
|
||||
|
||||
Load a tool and LLM with an agent, and then create a Gradio app. The key is to use [`stream_to_gradio`] to stream the agents messages and display how it's reasoning through a task.
|
||||
|
||||
```py
|
||||
import gradio as gr
|
||||
from transformers import (
|
||||
load_tool,
|
||||
ReactCodeAgent,
|
||||
HfApiEngine,
|
||||
stream_to_gradio,
|
||||
)
|
||||
|
||||
# Import tool from Hub
|
||||
image_generation_tool = load_tool("m-ric/text-to-image")
|
||||
llm_engine = HfApiEngine("meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
|
||||
# Initialize the agent with the image generation tool
|
||||
agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
|
||||
|
||||
def interact_with_agent(task):
|
||||
messages = []
|
||||
messages.append(gr.ChatMessage(role="user", content=task))
|
||||
yield messages
|
||||
for msg in stream_to_gradio(agent, task):
|
||||
messages.append(msg)
|
||||
yield messages + [
|
||||
gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
|
||||
]
|
||||
yield messages
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
|
||||
submit = gr.Button("Run illustrator agent!")
|
||||
chatbot = gr.Chatbot(
|
||||
label="Agent",
|
||||
type="messages",
|
||||
avatar_images=(
|
||||
None,
|
||||
"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
|
||||
),
|
||||
)
|
||||
submit.click(interact_with_agent, [text_input], [chatbot])
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
```
|
||||
|
||||
## Troubleshoot
|
||||
|
||||
For a better idea of what is happening when you call an agent, it is always a good idea to check the system prompt template first.
|
||||
|
||||
```py
|
||||
print(agent.system_prompt_template)
|
||||
```
|
||||
|
||||
If the agent is behaving unexpectedly, remember to explain the task you want to perform as clearly as possible. Every [`~Agent.run`] is different and minor variations in your system prompt may yield completely different results.
|
||||
|
||||
To find out what happened after a run, check the following agent attributes.
|
||||
|
||||
- `agent.logs` stores the finegrained agent logs. At every step of the agents run, everything is stored in a dictionary and appended to `agent.logs`.
|
||||
- `agent.write_inner_memory_from_logs` only stores a high-level overview of the agents run. For example, at each step, it stores the LLM output as a message and the tool call output as a separate message. Not every detail from a step is transcripted by `write_inner_memory_from_logs`.
|
||||
|
||||
## Resources
|
||||
|
||||
Learn more about ReAct agents in the [Open-source LLMs as LangChain Agents](https://hf.co/blog/open-source-llms-as-agents) blog post.
|
||||
|
61
docs/source/en/attention.md
Normal file
61
docs/source/en/attention.md
Normal file
@ -0,0 +1,61 @@
|
||||
<!--Copyright 2023 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Attention mechanisms
|
||||
|
||||
Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
|
||||
computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
|
||||
use a sparse version of the attention matrix to speed up training.
|
||||
|
||||
## LSH attention
|
||||
|
||||
[Reformer](model_doc/reformer) uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
|
||||
dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
|
||||
the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
|
||||
modified to mask the current token (except at the first position), because it will give a query and a key equal (so
|
||||
very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
|
||||
(determined by a n_rounds parameter) and then are averaged together.
|
||||
|
||||
## Local attention
|
||||
|
||||
[Longformer](model_doc/longformer) uses local attention: often, the local context (e.g., what are the two tokens to the
|
||||
left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
|
||||
window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
|
||||
representation of the whole sentence.
|
||||
|
||||
Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
|
||||
all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
|
||||
their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
|
||||
|
||||
<div class="flex justify-center">
|
||||
<img scale="50 %" align="center" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/local_attention_mask.png"/>
|
||||
</div>
|
||||
|
||||
Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
|
||||
length.
|
||||
|
||||
## Other tricks
|
||||
|
||||
### Axial positional encodings
|
||||
|
||||
[Reformer](model_doc/reformer) uses axial positional encodings: in traditional transformer models, the positional encoding
|
||||
E is a matrix of size \\(l\\) by \\(d\\), \\(l\\) being the sequence length and \\(d\\) the dimension of the
|
||||
hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
|
||||
that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
|
||||
dimensions \\(l_{1} \times d_{1}\\) and \\(l_{2} \times d_{2}\\), such that \\(l_{1} \times l_{2} = l\\) and
|
||||
\\(d_{1} + d_{2} = d\\) (with the product for the lengths, this ends up being way smaller). The embedding for time
|
||||
step \\(j\\) in E is obtained by concatenating the embeddings for timestep \\(j \% l1\\) in E1 and \\(j // l1\\)
|
||||
in E2.
|
@ -23,13 +23,13 @@ supported models.
|
||||
Most recent models can now switch from one attention function used in the Attention layer to the other, thanks to a simple mapping.
|
||||
By default, we provide the implementation for [`sdpa`](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html),
|
||||
[`flash_attention_2`](https://github.com/Dao-AILab/flash-attention) and [`flex_attention`](https://pytorch.org/docs/stable/nn.attention.flex_attention.html#module-torch.nn.attention.flex_attention)
|
||||
as well as `eager`, which is a simple matrix multiplication without any optimization on top.
|
||||
as well as `eager`, which is simple matrix multiplication without any optimization on top.
|
||||
This is the setting you can usually choose when instantiating a model:
|
||||
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model_id = "meta-llama/Llama-3.2-1B"
|
||||
model_id = "meta-llama/Llama-3.2-1B
|
||||
|
||||
# Here, using flash attention as an example
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="flash_attention_2")
|
||||
@ -43,7 +43,7 @@ from transformers import AutoModelForCausalLM, AttentionInterface
|
||||
from transformers.integrations.sdpa_attention import sdpa_attention_forward
|
||||
import torch
|
||||
|
||||
model_id = "meta-llama/Llama-3.2-1B"
|
||||
model_id = "meta-llama/Llama-3.2-1B
|
||||
|
||||
def my_new_sdpa(*args, **kwargs):
|
||||
print("I just entered the attention computation")
|
||||
@ -56,54 +56,26 @@ model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="my_n
|
||||
model(torch.ones(1, 5, dtype=int))
|
||||
```
|
||||
|
||||
You will see it prints "I just entered the attention computation" as many times as there are layers in the model (with this example, 16 times).
|
||||
You will see it prints "I just entered the attention computation" as many times as there are layers in the model (with this example, 16 times.
|
||||
|
||||
## Dynamically switching attention function
|
||||
|
||||
You could dynamically change the model's attention function as well:
|
||||
You could dynamically change the model's attention function as well, by overriding the `config._attn_implementation` field:
|
||||
|
||||
```python
|
||||
# Back to use original sdpa implementation
|
||||
model.set_attn_implementation("sdpa")
|
||||
model.config._attn_implementation = "sdpa"
|
||||
|
||||
model(torch.ones(1, 5, dtype=int))
|
||||
```
|
||||
|
||||
and it will stop printing the statements, as it now uses the `sdpa` attention.
|
||||
This allows to quickly change an attention function, without needing to reload the model!
|
||||
This allows to quickly change attention function, without needing to reload the model!
|
||||
|
||||
## Different attention per backbone in multimodal models
|
||||
|
||||
For multimodal models different attention functions may work better for each backbone module. For example, some vision backbones perform better in fp32, but are incompatible with FlashAttention. To continue using FlashAttention while keeping the vision encoder in fp32, create a dict and map each config to an attention implementation as shown below.
|
||||
|
||||
```python
|
||||
from transformers import AutoModelForImageTextToText
|
||||
|
||||
model_id = "facebook/chameleon-7b"
|
||||
|
||||
attention_implementation_per_backbone = {"vision_config": "sdpa", "text_config": "flash_attention_2"}
|
||||
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation=attention_implementation_per_backbone)
|
||||
|
||||
# NOTE: keys in the attention implementation have to be the same as the sub-config names
|
||||
for key in attention_implementation_per_backbone:
|
||||
assert key in model.config.sub_configs, f"Invalid key in `attention_implementation`"
|
||||
|
||||
# You can omit certain backbones - the default attention function (SDPA) will be used
|
||||
# This is equivalent to the previous example
|
||||
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation={"text_config": "flash_attention_2"})
|
||||
|
||||
|
||||
# Set the same attention implementation for all backbones with single string, same as in non-multimodal models
|
||||
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation="eager")
|
||||
|
||||
# Alternatively use a dict with an empty key for global configuration
|
||||
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation={"": "eager"})
|
||||
```
|
||||
|
||||
## What about new args needed in my custom attention function?
|
||||
## What about new args needed in my custom function?
|
||||
|
||||
But indeed, what if the new function requires a new arg to be properly used? It's no issue! Models supporting the
|
||||
`AttentionInterface` propagate kwargs all the way to the Attention layers, and to the used attention function. That way,
|
||||
`AttentionInterface` propagates kwargs all the way to the Attention layers, and to the attention function used. That way,
|
||||
you can simply pass the arg (as a kwargs, i.e. you need to qualify the name of the arg) in the model's forward, and it will be correctly used in the attention. However, custom attention functions have some limitations. In particular, it must follow the signature and return format of other attention functions, i.e.
|
||||
|
||||
```python
|
||||
@ -120,7 +92,7 @@ def custom_attention(
|
||||
a_new_kwargs = None, # You can now add as many kwargs as you need
|
||||
another_new_kwargs = None, # You can now add as many kwargs as you need
|
||||
**kwargs, # You need to accept **kwargs as models will pass other args
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]
|
||||
... # do your magic!
|
||||
return attn_output, attn_weights # attn_weights are optional here
|
||||
|
||||
@ -131,66 +103,4 @@ model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="cust
|
||||
model(torch.ones(1, 5, dtype=int), a_new_kwargs=..., another_new_kwargs=...)
|
||||
```
|
||||
|
||||
If in doubt about what args/kwargs a given model sends to the attention function, simply check that model's modeling code on [GitHub](https://github.com/huggingface/transformers/tree/main/src/transformers/models)!
|
||||
|
||||
## Accessing current available implementations
|
||||
|
||||
Most of the time, you will simply need to `register` a new function. If, however, you need to access an existing one,
|
||||
and/or perform a few checks, the preferred way is to use the global `ALL_ATTENTION_FUNCTIONS`. It behaves the same way you
|
||||
would expect from a usual Python dictionary:
|
||||
|
||||
```python
|
||||
>>> from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
|
||||
|
||||
>>> list(ALL_ATTENTION_FUNCTIONS.keys())
|
||||
>>> ['flash_attention_2', 'flex_attention', 'sdpa']
|
||||
|
||||
>>> ALL_ATTENTION_FUNCTIONS["sdpa"]
|
||||
>>> <function transformers.integrations.sdpa_attention.sdpa_attention_forward>
|
||||
|
||||
>>> ALL_ATTENTION_FUNCTIONS.get("sdpa", None)
|
||||
>>> <function transformers.integrations.sdpa_attention.sdpa_attention_forward>
|
||||
|
||||
# You can also globally `register` a new function directly on it
|
||||
>>> ALL_ATTENTION_FUNCTIONS.register("new_func", new_func)
|
||||
```
|
||||
|
||||
## Attention Mask Interface
|
||||
|
||||
Having a new attention function may mean that you need a new format of attention mask to decide what key and value tokens
|
||||
the query tokens should attend to. This is now possible with the `AttentionMaskInterface`! It works in the same way as
|
||||
the `AttentionInterface`:
|
||||
|
||||
```python
|
||||
from transformers import AttentionMaskInterface
|
||||
from transformers.masking_utils import sdpa_mask
|
||||
import torch
|
||||
|
||||
def my_new_sdpa_mask(*args, **kwargs):
|
||||
print("I just entered the attention mask computation")
|
||||
return sdpa_mask(*args, **kwargs)
|
||||
|
||||
AttentionMaskInterface.register("my_new_sdpa_mask", my_new_sdpa_mask)
|
||||
```
|
||||
|
||||
The reason you have to register it is because we need to automatically correct your mask format based on the attention implementation (for example, flex attention uses a BlockMask format, while sdpa uses a 4D tensor).
|
||||
By default, if you do not register an attention mask function along with your attention function, mask creation will be skipped
|
||||
and `attention_mask=None` will be passed along to the Attention layers.
|
||||
|
||||
The default signature of the attention mask functions is the following:
|
||||
|
||||
```python
|
||||
def custom_attention_mask(
|
||||
batch_size: int, # required arg
|
||||
cache_position: torch.Tensor, # required arg
|
||||
kv_length: int, # required arg
|
||||
kv_offset: int = 0, # required arg
|
||||
mask_function: Callable = causal_mask_function, # required arg
|
||||
attention_mask: Optional[torch.Tensor] = None, # required arg
|
||||
**kwargs, # a few additional args may be passed as kwargs, especially the model's config is always passed
|
||||
) -> Optional[torch.Tensor]:
|
||||
```
|
||||
|
||||
It mostly works thanks to the `mask_function`, which is a `Callable` in the form of [torch's mask_mod functions](https://pytorch.org/blog/flexattention/), taking 4 indices as input and returning a boolean to indicate if this position should take part in the attention computation.
|
||||
|
||||
If you cannot use the `mask_function` to create your mask for some reason, you can try to work around it by doing something similar to our [torch export workaround](https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/executorch.py).
|
||||
If in doubt about what args/kwargs a given model sends to the attention function, simply check that model's modeling code on [GitHub](https://github.com/huggingface/transformers/tree/main/src/transformers/models)!
|
@ -1,307 +0,0 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Documenting a model
|
||||
|
||||
The `@auto_docstring` decorator in Transformers generates consistent docstrings for model classes and their methods. It reduces boilerplate by automatically including standard argument descriptions while also allowing overrides to add new or custom arguments. [Contributing a new model](./modular_transformers) is easier because you don't need to manually add the standard docstrings, and only focus on documenting new arguments.
|
||||
|
||||
This guide describes how to use the `@auto_docstring` decorator and how it works.
|
||||
|
||||
## @auto_docstring
|
||||
|
||||
Start by importing the decorator in the modeling file (`modular_model.py` or `modeling_model.py`).
|
||||
|
||||
```python
|
||||
from ...utils import auto_docstring
|
||||
```
|
||||
|
||||
Select whether you'd like to apply `@auto_docstring` to a class or function below to see how to use it.
|
||||
|
||||
<hfoptions id="type">
|
||||
<hfoption id="classes">
|
||||
|
||||
Place `@auto_docstring` directly above the class definition. The decorator derives parameter descriptions from the `__init__` method's signature and docstring.
|
||||
|
||||
```python
|
||||
from transformers.modeling_utils import PreTrainedModel
|
||||
from ...utils import auto_docstring
|
||||
|
||||
@auto_docstring
|
||||
class MyAwesomeModel(PreTrainedModel):
|
||||
def __init__(self, config, custom_parameter: int = 10, another_custom_arg: str = "default"):
|
||||
r"""
|
||||
custom_parameter (`int`, *optional*, defaults to 10):
|
||||
Description of the custom_parameter for MyAwesomeModel.
|
||||
another_custom_arg (`str`, *optional*, defaults to "default"):
|
||||
Documentation for another unique argument.
|
||||
"""
|
||||
super().__init__(config)
|
||||
self.custom_parameter = custom_parameter
|
||||
self.another_custom_arg = another_custom_arg
|
||||
# ... rest of your init
|
||||
|
||||
# ... other methods
|
||||
```
|
||||
|
||||
Arguments can also be passed directly to `@auto_docstring` for more control. Use the `custom_intro` parameter to describe the argument and the `custom_args` parameter to describe the arguments.
|
||||
|
||||
```python
|
||||
@auto_docstring(
|
||||
custom_intro="""This model performs specific synergistic operations.
|
||||
It builds upon the standard Transformer architecture with unique modifications.""",
|
||||
custom_args="""
|
||||
custom_parameter (`type`, *optional*, defaults to `default_value`):
|
||||
A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`.
|
||||
internal_helper_arg (`type`, *optional*, defaults to `default_value`):
|
||||
A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`.
|
||||
"""
|
||||
)
|
||||
class MySpecialModel(PreTrainedModel):
|
||||
def __init__(self, config: ConfigType, custom_parameter: "type" = "default_value", internal_helper_arg=None):
|
||||
# ...
|
||||
```
|
||||
|
||||
You can also choose to only use `custom_intro` and define the custom arguments directly in the class.
|
||||
|
||||
```python
|
||||
@auto_docstring(
|
||||
custom_intro="""This model performs specific synergistic operations.
|
||||
It builds upon the standard Transformer architecture with unique modifications.""",
|
||||
)
|
||||
class MySpecialModel(PreTrainedModel):
|
||||
def __init__(self, config: ConfigType, custom_parameter: "type" = "default_value", internal_helper_arg=None):
|
||||
r"""
|
||||
custom_parameter (`type`, *optional*, defaults to `default_value`):
|
||||
A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`.
|
||||
internal_helper_arg (`type`, *optional*, defaults to `default_value`):
|
||||
A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`.
|
||||
"""
|
||||
# ...
|
||||
```
|
||||
|
||||
You should also use the `@auto_docstring` decorator for classes that inherit from [`~utils.ModelOutput`].
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
@auto_docstring(
|
||||
custom_intro="""
|
||||
Custom model outputs with additional fields.
|
||||
"""
|
||||
)
|
||||
class MyModelOutput(ImageClassifierOutput):
|
||||
r"""
|
||||
loss (`torch.FloatTensor`, *optional*):
|
||||
The loss of the model.
|
||||
custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*):
|
||||
A custom output field specific to this model.
|
||||
"""
|
||||
|
||||
# Standard fields like hidden_states, logits, attentions etc. can be automatically documented if the description is the same as the standard arguments.
|
||||
# However, given that the loss docstring is often different per model, you should document it in the docstring above.
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[tuple[torch.FloatTensor, ...]] = None
|
||||
# Custom fields need to be documented in the docstring above
|
||||
custom_field: Optional[torch.FloatTensor] = None
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="functions">
|
||||
|
||||
Place `@auto_docstring` directly above the method definition. The decorator derives parameter descriptions from the function signature.
|
||||
|
||||
```python
|
||||
@auto_docstring
|
||||
def forward(
|
||||
self,
|
||||
input_ids: Optional[torch.Tensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
new_custom_argument: Optional[torch.Tensor] = None,
|
||||
arg_documented_in_args_doc: Optional[torch.Tensor] = None,
|
||||
# ... other arguments
|
||||
) -> Union[Tuple, ModelOutput]: # The description of the return value will automatically be generated from the ModelOutput class docstring.
|
||||
r"""
|
||||
new_custom_argument (`torch.Tensor`, *optional*):
|
||||
Description of this new custom argument and its expected shape or type.
|
||||
"""
|
||||
# ...
|
||||
```
|
||||
|
||||
Arguments can also be passed directly to `@auto_docstring` for more control. Use the `custom_intro` parameter to describe the argument and the `custom_args` parameter to describe the arguments.
|
||||
|
||||
The `Returns` and `Examples` parts of the docstring can also be manually specified.
|
||||
|
||||
|
||||
```python
|
||||
MODEL_COMMON_CUSTOM_ARGS = r"""
|
||||
common_arg_1 (`torch.Tensor`, *optional*, defaults to `default_value`):
|
||||
Description of common_arg_1
|
||||
common_arg_2 (`torch.Tensor`, *optional*, defaults to `default_value`):
|
||||
Description of common_arg_2
|
||||
...
|
||||
"""
|
||||
|
||||
class MyModel(PreTrainedModel):
|
||||
# ...
|
||||
@auto_docstring(
|
||||
custom_intro="""
|
||||
This is a custom introduction for the function.
|
||||
"""
|
||||
custom_args=MODEL_COMMON_CUSTOM_ARGS
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
input_ids: Optional[torch.Tensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
common_arg_1: Optional[torch.Tensor] = None,
|
||||
common_arg_2: Optional[torch.Tensor] = None,
|
||||
#...
|
||||
function_specific_argument: Optional[torch.Tensor] = None,
|
||||
# ... other arguments
|
||||
) -> torch.Tensor:
|
||||
r"""
|
||||
function_specific_argument (`torch.Tensor`, *optional*):
|
||||
Description of an argument specific to this function
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`: For a function returning a generic type, a custom "Returns" section can be specified.
|
||||
|
||||
Example:
|
||||
|
||||
(To override the default example with a custom one or to add an example for a model class that does not have a pipeline)
|
||||
|
||||
```python
|
||||
...
|
||||
```
|
||||
"""
|
||||
# ...
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Documenting arguments
|
||||
|
||||
There are some rules for documenting different types of arguments and they're listed below.
|
||||
|
||||
- Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are defined and retrieved from `auto_docstring.py`. It is the single source of truth for standard arguments and should not be redefined locally if an argument's description and shape is the same as an argument in `auto_docstring.py`.
|
||||
|
||||
If a standard argument behaves differently in your model, then you can override it locally in a `r""" """` block. This local definition has a higher priority. For example, the `labels` argument is often customized per model and typically requires overriding.
|
||||
|
||||
|
||||
- New or custom arguments should be documented within an `r""" """` block after the signature if it is a function or in the `__init__` method's docstring if it is a class.
|
||||
|
||||
```py
|
||||
argument_name (`type`, *optional*, defaults to `X`):
|
||||
Description of the argument.
|
||||
Explain its purpose, expected shape/type if complex, and default behavior.
|
||||
This can span multiple lines.
|
||||
```
|
||||
|
||||
* Include `type` in backticks.
|
||||
* Add *optional* if the argument is not required or has a default value.
|
||||
* Add "defaults to X" if it has a default value. You don't need to add "defaults to `None`" if the default value is `None`.
|
||||
|
||||
These arguments can also be passed to `@auto_docstring` as a `custom_args` argument. It is used to define the docstring block for new arguments once if they are repeated in multiple places in the modeling file.
|
||||
|
||||
```py
|
||||
class MyModel(PreTrainedModel):
|
||||
# ...
|
||||
@auto_docstring(
|
||||
custom_intro="""
|
||||
This is a custom introduction for the function.
|
||||
"""
|
||||
custom_args=r"""
|
||||
common_arg_1 (`torch.Tensor`, *optional*, defaults to `default_value`):
|
||||
Description of common_arg_1
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
## Checking the docstrings
|
||||
|
||||
Transformers includes a utility script to validate the docstrings when you open a Pull Request which triggers CI (continuous integration) checks. The script checks for the following criteria.
|
||||
|
||||
* Ensures `@auto_docstring` is applied to relevant mode classes and public methods.
|
||||
* Ensures arguments are complete and consistent. It checks that documented arguments exist in the signature and verifies whether the types and default values in the docstring match the signature. Arguments that aren't known standard arguments or if they lack a local description are flagged.
|
||||
* Reminds you to complete placeholders like `<fill_type>` and `<fill_docstring>`.
|
||||
* Ensures docstrings are formatted according to the expected docstring style.
|
||||
|
||||
You can run this check locally - before committing - by running the following command.
|
||||
|
||||
```bash
|
||||
make fix-copies
|
||||
```
|
||||
|
||||
`make fix-copies` runs several other checks as well. If you don't need those checks, run the command below to only perform docstring and auto-docstring checks.
|
||||
|
||||
```bash
|
||||
python utils/check_docstrings.py # to only check files included in the diff without fixing them
|
||||
# python utils/check_docstrings.py --fix_and_overwrite # to fix and overwrite the files in the diff
|
||||
# python utils/check_docstrings.py --fix_and_overwrite --check_all # to fix and overwrite all files
|
||||
```
|
||||
|
||||
## modular_model.py files
|
||||
|
||||
When working with modular files (`modular_model.py`), follow the guidelines below for applying `@auto_docstring`.
|
||||
|
||||
- For standalone models in modular files, apply `@auto_docstring` like you would in a `modeling_model.py` file.
|
||||
- For models that inherit from other library models, `@auto_docstring` is automatically carried over to the generated modeling file. You don't need to add `@auto_docstring` in your modular file.
|
||||
|
||||
If you need to modify the `@auto_docstring` behavior, apply the customized decorator in your modular file. Make sure to **include all other decorators** that are present in the original function or class.
|
||||
|
||||
> [!WARNING]
|
||||
> When overriding any decorator in a modular file, you must include **all** decorators that were applied to that function or class in the parent model. If you only override some decorators, the others won't be included in the generated modeling file.
|
||||
|
||||
## How it works
|
||||
|
||||
The `@auto_docstring` decorator automatically generates docstrings by:
|
||||
|
||||
1. Inspecting the signature (arguments, types, defaults) of the decorated class' `__init__` method or the decorated function.
|
||||
2. Retrieving the predefined docstrings for common arguments (`input_ids`, `attention_mask`, etc.) from internal library sources like [`ModelArgs`], [`ImageProcessorArgs`], and the `auto_docstring.py` file.
|
||||
3. Adding argument descriptions in one of two ways as shown below.
|
||||
|
||||
| method | description | usage |
|
||||
|---|---|---|
|
||||
| `r""" """` | add custom docstring content directly to a method signature or within the `__init__` docstring | document new arguments or override standard descriptions |
|
||||
| `custom_args` | add custom docstrings for specific arguments directly in `@auto_docstring` | define docstring for new arguments once if they're repeated in multiple places in the modeling file |
|
||||
|
||||
4. Adding class and function descriptions. For model classes with standard naming patterns, like `ModelForCausalLM`, or if it belongs to a pipeline, `@auto_docstring` automatically generates the appropriate descriptions with `ClassDocstring` from `auto_docstring.py`.
|
||||
|
||||
`@auto_docstring` also accepts the `custom_intro` argument to describe a class or function.
|
||||
|
||||
5. Using a templating system to allow predefined docstrings to include dynamic information from Transformers' [auto_modules](https://github.com/huggingface/transformers/tree/main/src/transformers/models/auto) such as `{{processor_class}}` and `{{config_class}}`.
|
||||
|
||||
6. Finding appropriate usage examples based on the model's task or pipeline compatibility. It extracts checkpoint information form the model's configuration class to provide concrete examples with real model identifiers.
|
||||
|
||||
7. Adding return values to the docstring. For methods like `forward`, the decorator automatically generates the `Returns` field in the docstring based on the method's return type annotation.
|
||||
|
||||
For example, if a method returns a [`~transformers.utils.ModelOutput`] subclass, `@auto_docstring` extracts the field descriptions from the class' docstring to create a comprehensive return value description. You can also manually specifiy a custom `Returns` field in a functions docstring.
|
||||
|
||||
8. Unrolling kwargs typed with the unpack operator. For specific methods (defined in `UNROLL_KWARGS_METHODS`) or classes (defined in `UNROLL_KWARGS_CLASSES`), the decorator processes `**kwargs` parameters that are typed with `Unpack[KwargsTypedDict]`. It extracts the documentations from the `TypedDict` and adds each parameter to the function's docstring.
|
||||
|
||||
Currently only supported for [`FastImageProcessorKwargs`].
|
||||
|
||||
## Best practices
|
||||
|
||||
Follow the best practices below to help maintain consistent and informative documentation for Transformers!
|
||||
|
||||
* Use `@auto_docstring` for new PyTorch model classes ([`PreTrainedModel`] subclasses) and their primary methods like `forward` or `get_text_features`.
|
||||
* For classes, `@auto_docstring` retrieves parameter descriptions from the `__init__` method's docstring.
|
||||
* Rely on standard docstrings and do not redefine common arguments unless their behavior is different in your model.
|
||||
* Document new or custom arguments clearly.
|
||||
* Run `check_docstrings` locally and iteratively.
|
@ -15,7 +15,8 @@ rendered properly in your Markdown viewer.
|
||||
-->
|
||||
|
||||
# Caching
|
||||
Imagine you're having a conversation with someone, and instead of remembering what they previously said, they have to start from scratch every time you respond. This would be slow and inefficient, right?
|
||||
|
||||
Imagine you’re having a conversation with someone, and instead of remembering what they previously said, they have to start from scratch every time you respond. This would be slow and inefficient, right?
|
||||
|
||||
You can extend this analogy to transformer models. Autoregressive model generation can be slow because it makes a prediction one token at a time. Each new prediction is dependent on all the previous context.
|
||||
|
||||
@ -28,50 +29,8 @@ A key-value (KV) cache eliminates this inefficiency by storing kv pairs derived
|
||||
> [!WARNING]
|
||||
> Caching should only be used for **inference**. It may cause unexpected errors if it's enabled during training.
|
||||
|
||||
To better understand how and why caching works, let's take a closer look at the structure of the attention matrices.
|
||||
|
||||
## Attention matrices
|
||||
|
||||
The **scaled dot-product attention** is calculated as shown below for a batch of size `b`, number of attention heads `h`, sequence length so far `T`, and dimension per attention head `d_head`.
|
||||
|
||||
$$
|
||||
\text{Attention}(Q, K, V) = \text{softmax}\left( \frac{Q K^\top}{\sqrt{d_{\text{head}}}} \times \text{mask} \right) V
|
||||
$$
|
||||
|
||||
The query (`Q`), key (`K`), and value (`V`) matrices are projections from the input embeddings of shape `(b, h, T, d_head)`.
|
||||
|
||||
For causal attention, the mask prevents the model from attending to future tokens. Once a token is processed, its representation never changes with respect to future tokens, which means \\( K_{\text{past}} \\) and \\( V_{\text{past}} \\) can be cached and reused to compute the last token's representation.
|
||||
|
||||
$$
|
||||
\text{Attention}(q_t, [\underbrace{k_1, k_2, \dots, k_{t-1}}_{\text{cached}}, k_{t}], [\underbrace{v_1, v_2, \dots, v_{t-1}}_{\text{cached}}, v_{t}])
|
||||
$$
|
||||
|
||||
At inference time, you only need the last token's query to compute the representation \\( x_t \\) that predicts the next token \\( t+1 \\). At each step, the new key and value vectors are **stored** in the cache and **appended** to the past keys and values.
|
||||
|
||||
$$
|
||||
K_{\text{cache}} \leftarrow \text{concat}(K_{\text{past}}, k_t), \quad V_{\text{cache}} \leftarrow \text{concat}(V_{\text{past}}, v_t)
|
||||
$$
|
||||
|
||||
Attention is calculated independently in each layer of the model, and caching is done on a per-layer basis.
|
||||
|
||||
Refer to the table below to compare how caching improves efficiency.
|
||||
|
||||
| without caching | with caching |
|
||||
|---|---|
|
||||
| for each step, recompute all previous `K` and `V` | for each step, only compute current `K` and `V`
|
||||
| attention cost per step is **quadratic** with sequence length | attention cost per step is **linear** with sequence length (memory grows linearly, but compute/token remains low) |
|
||||
|
||||
|
||||
|
||||
## Cache class
|
||||
|
||||
A basic KV cache interface takes a key and value tensor for the current token and returns the updated `K` and `V` tensors. This is internally managed by a model's `forward` method.
|
||||
|
||||
```py
|
||||
new_K, new_V = cache.update(k_t, v_t, layer_idx)
|
||||
attn_output = attn_layer_idx_fn(q_t, new_K, new_V)
|
||||
```
|
||||
|
||||
When you use Transformers' [`Cache`] class, the self-attention module performs several critical steps to integrate past and present information.
|
||||
|
||||
1. The attention module concatenates current kv pairs with past kv pairs stored in the cache. This creates attentions weights with the shape `(new_tokens_length, past_kv_length + new_tokens_length)`. The current and past kv pairs are essentially combined to compute the attention scores, ensuring a model is aware of previous context and the current input.
|
||||
@ -80,21 +39,6 @@ When you use Transformers' [`Cache`] class, the self-attention module performs s
|
||||
|
||||
3. It is also important to be aware of the `cache_position`. This is important if you want to reuse a prefilled [`Cache`] with the `forward` method because you have to pass a valid `cache_position` value. This indicates the input positions in a sequence. `cache_position` is unaffected by padding, and it always adds one more position for each token. For example, if a kv cache contains 10 tokens - regardless of pad tokens - the cache position for the next token should be `torch.tensor([10])`.
|
||||
|
||||
## Cache storage implementation
|
||||
|
||||
Caches are structured as a list of layers, where each layer contains a key and value cache. The key and value caches are tensors with the shape `[batch_size, num_heads, seq_len, head_dim]`.
|
||||
|
||||
Layers can be of different types (e.g. `DynamicLayer`, `StaticLayer`, `SlidingWindowLayer`), which mostly changes how sequence length is handled and how the cache is updated.
|
||||
|
||||
The simplest is a `DynamicLayer` that grows as more tokens are processed. The sequence length dimension (`seq_len`) increases with each new token:
|
||||
|
||||
```py
|
||||
cache.layers[idx].keys = torch.cat([cache.layers[idx].keys, key_states], dim=-2)
|
||||
cache.layers[idx].values = torch.cat([cache.layers[idx].values, value_states], dim=-2)
|
||||
```
|
||||
|
||||
Other layer types like `StaticLayer` and `SlidingWindowLayer` have a fixed sequence length that is set when the cache is created. This makes them compatible with `torch.compile`. In the case of `SlidingWindowLayer`, existing tokens are shifted out of the cache when a new token is added.
|
||||
|
||||
The example below demonstrates how to create a generation loop with [`DynamicCache`]. As discussed, the attention mask is a concatenation of past and current token values and `1` is added to the cache position for the next token.
|
||||
|
||||
```py
|
||||
@ -129,43 +73,11 @@ print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])
|
||||
"[INST] Hello, what's your name. [/INST] Hello! My name is LLaMA,"
|
||||
```
|
||||
|
||||
## Cache position
|
||||
|
||||
The cache position tracks where to insert new tokens in the attention cache. It represents the *absolute* position of each token in the context, independent of padding or batch structure. Suppose you already cached `N` tokens and are now processing `K` new tokens. The cache position for the new tokens will range from `N` to `N + K - 1`. In other words, you're processing tokens at positions - `[N, N + 1, N + 2, ..., N + K - 1]`.
|
||||
|
||||
Cache position is used internally for two purposes:
|
||||
|
||||
1. Selecting new tokens to process in the input sequence and ensuring only tokens that haven’t been cached yet are passed to the model's `forward`.
|
||||
2. Storing key/value pairs at the correct positions in the cache. This is especially important for fixed-size caches, like [`StaticCache`], that pre-allocates a specific cache length.
|
||||
|
||||
The generation loop usually takes care of the cache position, but if you're writing a custom generation method, it is important that cache positions are accurate since they are used to write and read key/value states into fixed slots.
|
||||
|
||||
|
||||
```py
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache
|
||||
|
||||
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda:0")
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
messages = [{"role": "user", "content": "You are a helpful assistant."}]
|
||||
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to("cuda:0")
|
||||
generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=10)
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Legacy cache format
|
||||
|
||||
Before the [`Cache`] class, the cache used to be stored as a tuple of tuples of tensors. This format is dynamic because it grows as text is generated, similar to [`DynamicCache`].
|
||||
Before the [`Cache`] class, the cache used to be stored as a tuple of tuples of tensors. This format has is dynamic because it grows as text is generated, similar to [`DynamicCache`].
|
||||
|
||||
The legacy format is essentially the same data structure but organized differently.
|
||||
- It's a tuple of tuples, where each inner tuple contains the key and value tensors for a layer.
|
||||
- The tensors have the same shape `[batch_size, num_heads, seq_len, head_dim]`.
|
||||
- The format is less flexible and doesn't support features like quantization or offloading.
|
||||
|
||||
If your project depends on this legacy format, we recommend to convert to [`DynamicCache`] with [`~DynamicCache.from_legacy_cache`]. Note that legacy cache format is deprecated and not used anymore in `Transformers`. You can convert back to tuple format with [`DynamicCache.to_legacy_cache`] functions, which is helpful if you have custom logic for manipulating a cache in a specific format.
|
||||
If your project depends on this legacy format, you can convert between [`DynamicCache`] and a tuple of tuples as shown below with the [`~DynamicCache.from_legacy_cache`] and [`DynamicCache.to_legacy_cache`] functions. This is helpful if you have custom logic for manipulating a cache in a specific format.
|
||||
|
||||
```py
|
||||
import torch
|
||||
@ -181,4 +93,4 @@ generation_outputs = model.generate(**inputs, return_dict_in_generate=True, retu
|
||||
|
||||
cache = DynamicCache.from_legacy_cache(generation_outputs.past_key_values)
|
||||
legacy_format_cache = cache.to_legacy_cache()
|
||||
```
|
||||
```
|
@ -56,7 +56,7 @@ Create a [`ImageTextToTextPipeline`] and pass the chat to it. For large models,
|
||||
import torch
|
||||
from transformers import pipeline
|
||||
|
||||
pipeline = pipeline("image-text-to-text", model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf", device_map="auto", torch_dtype=torch.float16)
|
||||
pipeline = pipeline("image-text-to-text", model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf", device="cuda", torch_dtype=torch.float16)
|
||||
pipeline(text=messages, max_new_tokens=50, return_full_text=False)
|
||||
[{'input_text': [{'role': 'system',
|
||||
'content': [{'type': 'text',
|
||||
@ -175,7 +175,36 @@ processed_chat = processor.apply_chat_template(
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
video_fps=16,
|
||||
video_fps=32,
|
||||
video_load_backend="decord",
|
||||
)
|
||||
print(processed_chat.keys())
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="custom frame sampling">
|
||||
|
||||
Some models don't sample frames *uniformly* and require more complex logic to determine which frames to use. For example, the model may have an *adaptive frame selection* or if the model prioritizes *key moments* in a video rather than evenly spaced frames.
|
||||
|
||||
If a model has a different sampling strategy, you can write a function that customizes frame selection. The function should include the following requirements.
|
||||
|
||||
- Use the `sample_indices_fn` parameter to pass a callable function for sampling.
|
||||
- If provided, this function *overrides* the standard `num_frames` and `fps` parameters.
|
||||
- The function receives all the parameters passed to `load_video` and must return valid frame indices to sample from.
|
||||
|
||||
An example function is shown below. This gives you full control over frame selection, making the model more adaptable to different video scenarios.
|
||||
|
||||
```py
|
||||
def sample_indices_fn(metadata, **kwargs):
|
||||
# samples only the first and the second frame
|
||||
return [0, 1]
|
||||
|
||||
processed_chat = processor.apply_chat_template(
|
||||
messages,
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
sample_indices_fn=sample_indices_fn,
|
||||
video_load_backend="decord",
|
||||
)
|
||||
print(processed_chat.keys())
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user