mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-22 10:19:00 +08:00
Compare commits
25 Commits
check_doc_
...
multiple-m
Author | SHA1 | Date | |
---|---|---|---|
9ef4d0374e | |||
60226c6ff3 | |||
0863eef248 | |||
1a81d774b1 | |||
9f51dc2535 | |||
9b479a245b | |||
8ee50537fe | |||
8eaae6bee9 | |||
07182b2e10 | |||
4d2de5f63c | |||
c3ba53303b | |||
e6cc410d5b | |||
fdcfdbfd22 | |||
626666c444 | |||
429f1a682d | |||
dae8708c36 | |||
3e970dbbf1 | |||
77aa9fc076 | |||
55493f1390 | |||
c877c9fa5b | |||
7ec35bc3bd | |||
dad513e0c2 | |||
936aeb70ab | |||
23d6095e8f | |||
fae0f3dde8 |
@ -28,7 +28,6 @@ COMMON_ENV_VARIABLES = {
|
||||
"TRANSFORMERS_IS_CI": True,
|
||||
"PYTEST_TIMEOUT": 120,
|
||||
"RUN_PIPELINE_TESTS": False,
|
||||
"RUN_PT_TF_CROSS_TESTS": False,
|
||||
"RUN_PT_FLAX_CROSS_TESTS": False,
|
||||
}
|
||||
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
|
||||
@ -177,15 +176,6 @@ class CircleCIJob:
|
||||
|
||||
|
||||
# JOBS
|
||||
torch_and_tf_job = CircleCIJob(
|
||||
"torch_and_tf",
|
||||
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
|
||||
additional_env={"RUN_PT_TF_CROSS_TESTS": True},
|
||||
marker="is_pt_tf_cross_test",
|
||||
pytest_options={"rA": None, "durations": 0},
|
||||
)
|
||||
|
||||
|
||||
torch_and_flax_job = CircleCIJob(
|
||||
"torch_and_flax",
|
||||
additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
|
||||
@ -353,7 +343,7 @@ doc_test_job = CircleCIJob(
|
||||
pytest_num_workers=1,
|
||||
)
|
||||
|
||||
REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
REGULAR_TESTS = [torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
|
||||
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
|
||||
REPO_UTIL_TESTS = [repo_utils_job]
|
||||
|
344
.github/workflows/build-docker-images.yml
vendored
344
.github/workflows/build-docker-images.yml
vendored
@ -3,7 +3,7 @@ name: Build docker images (scheduled)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- check_doc_image
|
||||
- build_ci_docker_image*
|
||||
repository_dispatch:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@ -18,6 +18,132 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
latest-docker:
|
||||
name: "Latest PyTorch + TensorFlow [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
|
||||
latest-torch-deepspeed-docker-for-push-ci-daily-build:
|
||||
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
doc-builder:
|
||||
name: "Doc builder"
|
||||
# Push CI doesn't need this image
|
||||
@ -50,6 +176,218 @@ jobs:
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch:
|
||||
name: "Latest PyTorch [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-gpu
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch-amd:
|
||||
name: "Latest PyTorch (AMD) [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-tensorflow:
|
||||
name: "Latest TensorFlow [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-tensorflow-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-tensorflow-gpu
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch-deepspeed-amd:
|
||||
name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-quantization-torch-docker:
|
||||
name: "Latest Pytorch + Quantization [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-quantization-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-quantization-latest-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
@ -22,7 +22,6 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
|
||||
|
1
.github/workflows/model_jobs.yml
vendored
1
.github/workflows/model_jobs.yml
vendored
@ -30,7 +30,6 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
|
1
.github/workflows/model_jobs_amd.yml
vendored
1
.github/workflows/model_jobs_amd.yml
vendored
@ -30,7 +30,6 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
|
43
.github/workflows/push-important-models.yml
vendored
43
.github/workflows/push-important-models.yml
vendored
@ -7,14 +7,13 @@ on:
|
||||
env:
|
||||
OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
|
||||
jobs:
|
||||
get_modified_models:
|
||||
@ -25,13 +24,13 @@ jobs:
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
|
||||
with:
|
||||
files: src/transformers/models/**
|
||||
|
||||
|
||||
- name: Run step if only the files listed above change
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
id: set-matrix
|
||||
@ -60,41 +59,41 @@ jobs:
|
||||
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
matrix:
|
||||
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
|
||||
- name: Install locally transformers & other libs
|
||||
run: |
|
||||
apt install sudo
|
||||
sudo -H pip install --upgrade pip
|
||||
sudo -H pip uninstall -y transformers
|
||||
sudo -H pip install -U -e ".[testing]"
|
||||
sudo -H pip uninstall -y transformers
|
||||
sudo -H pip install -U -e ".[testing]"
|
||||
MAX_JOBS=4 pip install flash-attn --no-build-isolation
|
||||
pip install bitsandbytes
|
||||
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
|
||||
- name: Run FA2 tests
|
||||
id: run_fa2_tests
|
||||
run:
|
||||
pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.model-name }}_fa2_tests
|
||||
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
|
||||
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
@ -103,13 +102,13 @@ jobs:
|
||||
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
|
||||
status: ${{ steps.run_fa2_tests.conclusion}}
|
||||
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
||||
|
||||
- name: Run integration tests
|
||||
id: run_integration_tests
|
||||
if: always()
|
||||
run:
|
||||
pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
|
||||
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
@ -119,7 +118,7 @@ jobs:
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
|
||||
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
|
||||
|
1
.github/workflows/self-comment-ci.yml
vendored
1
.github/workflows/self-comment-ci.yml
vendored
@ -22,7 +22,6 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
|
1
.github/workflows/self-push-amd.yml
vendored
1
.github/workflows/self-push-amd.yml
vendored
@ -14,7 +14,6 @@ env:
|
||||
MKL_NUM_THREADS: 8
|
||||
PYTEST_TIMEOUT: 60
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
|
||||
jobs:
|
||||
|
9
.github/workflows/self-push.yml
vendored
9
.github/workflows/self-push.yml
vendored
@ -24,7 +24,6 @@ env:
|
||||
MKL_NUM_THREADS: 8
|
||||
PYTEST_TIMEOUT: 60
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
@ -293,7 +292,7 @@ jobs:
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
@ -406,7 +405,7 @@ jobs:
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
@ -516,7 +515,7 @@ jobs:
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
@ -648,6 +647,6 @@ jobs:
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||
|
3
.github/workflows/self-scheduled.yml
vendored
3
.github/workflows/self-scheduled.yml
vendored
@ -40,7 +40,6 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
|
||||
@ -571,4 +570,4 @@ jobs:
|
||||
with:
|
||||
docker: ${{ inputs.docker }}
|
||||
start_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
19
.github/workflows/ssh-runner.yml
vendored
19
.github/workflows/ssh-runner.yml
vendored
@ -5,7 +5,7 @@ on:
|
||||
inputs:
|
||||
runner_type:
|
||||
description: 'Type of runner to test (a10 or t4)'
|
||||
required: true
|
||||
required: true
|
||||
docker_image:
|
||||
description: 'Name of the Docker image'
|
||||
required: true
|
||||
@ -15,15 +15,14 @@ on:
|
||||
|
||||
env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
|
||||
jobs:
|
||||
get_runner:
|
||||
@ -78,7 +77,7 @@ jobs:
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
@ -344,7 +344,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
|
||||
Like the slow tests, there are other environment variables available which are not enabled by default during testing:
|
||||
- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
|
||||
- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
|
||||
- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
|
||||
|
||||
More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
|
||||
|
||||
|
@ -61,7 +61,6 @@ NOT_DEVICE_TESTS = {
|
||||
"test_load_save_without_tied_weights",
|
||||
"test_tied_weights_keys",
|
||||
"test_model_weights_reload_no_missing_tied_weights",
|
||||
"test_pt_tf_model_equivalence",
|
||||
"test_mismatched_shapes_have_properly_initialized_weights",
|
||||
"test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
|
||||
"test_model_is_small",
|
||||
@ -85,9 +84,6 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
"markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
|
||||
)
|
||||
|
@ -8,9 +8,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip instal
|
||||
RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr
|
||||
|
||||
# Torch needs to be installed before deepspeed
|
||||
# RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||
RUN python3 -m pip uninstall -y deepspeed
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
@ -2,10 +2,10 @@ FROM rocm/dev-ubuntu-22.04:6.2.4
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG PYTORCH='2.5.1'
|
||||
ARG TORCH_VISION='0.20.0'
|
||||
ARG TORCH_AUDIO='2.5.0'
|
||||
ARG ROCM='6.2'
|
||||
ARG PYTORCH='2.6.0'
|
||||
ARG TORCH_VISION='0.21.0'
|
||||
ARG TORCH_AUDIO='2.6.0'
|
||||
ARG ROCM='6.2.4'
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends \
|
||||
@ -16,9 +16,11 @@ RUN apt update && \
|
||||
python-is-python3 \
|
||||
rocrand-dev \
|
||||
rocthrust-dev \
|
||||
rocblas-dev \
|
||||
hipsolver-dev \
|
||||
hipsparse-dev \
|
||||
hipblas-dev \
|
||||
rocblas-dev && \
|
||||
hipblaslt-dev && \
|
||||
apt clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
@ -76,6 +76,9 @@ RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git
|
||||
RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
|
||||
RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1
|
||||
|
||||
# Add compressed-tensors for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir compressed-tensors
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
@ -673,6 +673,29 @@ tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Tensor Parallelism with PyTorch 2">
|
||||
|
||||
```yml
|
||||
compute_environment: LOCAL_MACHINE
|
||||
tp_config:
|
||||
tp_size: 4
|
||||
distributed_type: TP
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: 'no'
|
||||
num_machines: 1
|
||||
num_processes: 4
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
يُعد أمر [`accelerate_launch`](https://huggingface.co/docs/accelerate/package_reference/cli#accelerate-launch) هو الطريقة المُوصى بها لتشغيل نص البرمجى للتدريب على نظام موزع باستخدام Accelerate و [`Trainer`] مع المعلمات المحددة في `config_file.yaml`. يتم حفظ هذا الملف في مجلد ذاكرة التخزين المؤقت لـ Accelerate ويتم تحميله تلقائيًا عند تشغيل `accelerate_launch`.
|
||||
|
@ -284,7 +284,6 @@ Wie bei den langsamen Tests gibt es auch andere Umgebungsvariablen, die standard
|
||||
|
||||
* `RUN_CUSTOM_TOKENIZERS`: Aktiviert Tests für benutzerdefinierte Tokenizer.
|
||||
* `RUN_PT_FLAX_CROSS_TESTS`: Aktiviert Tests für die Integration von PyTorch + Flax.
|
||||
* `RUN_PT_TF_CROSS_TESTS`: Aktiviert Tests für die Integration von TensorFlow + PyTorch.
|
||||
|
||||
Weitere Umgebungsvariablen und zusätzliche Informationen finden Sie in der [testing_utils.py](src/transformers/testing_utils.py).
|
||||
|
||||
|
@ -55,7 +55,7 @@ To give some examples of how much VRAM it roughly takes to load a model in bfloa
|
||||
|
||||
As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require [tensor parallelism](https://huggingface.co/docs/transformers/perf_train_gpu_many#tensor-parallelism) and/or [pipeline parallelism](https://huggingface.co/docs/transformers/perf_train_gpu_many#naive-model-parallelism-vertical-and-pipeline-parallelism).
|
||||
|
||||
🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at [the text-generation-inference library](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models/custom_modeling).
|
||||
🤗 Transformers now supports tensor parallelism for supported models having `base_tp_plan` in their respecitve config classes. Learn more about Tensor Parallelism [here](perf_train_gpu_many#tensor-parallelism). Furthermore, if you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at [the text-generation-inference library](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models/custom_modeling).
|
||||
|
||||
Naive pipeline parallelism is supported out of the box. For this, simply load the model with `device="auto"` which will automatically place the different layers on the available GPUs as explained [here](https://huggingface.co/docs/accelerate/v0.22.0/en/concept_guides/big_model_inference).
|
||||
Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained [here](https://huggingface.co/docs/transformers/en/perf_train_gpu_many#naive-model-parallelism-vertical-and-pipeline-parallelism).
|
||||
|
@ -450,12 +450,13 @@ Implementations:
|
||||
- [parallelformers](https://github.com/tunib-ai/parallelformers) (only inference at the moment)
|
||||
- [SageMaker](https://arxiv.org/abs/2111.05972) - this is a proprietary solution that can only be used on AWS.
|
||||
- [OSLO](https://github.com/tunib-ai/oslo) has the tensor parallelism implementation based on the Transformers.
|
||||
- [`transformers` integration](main_classes/trainer) tensor parallelism is available through tp_size attribute for models having `base_tp_plan`. Further you can look at [example usage](perf_infer_gpu_multi)
|
||||
|
||||
SageMaker combines TP with DP for a more efficient processing.
|
||||
|
||||
🤗 Transformers status:
|
||||
- core: not yet implemented in the core
|
||||
- but if you want inference [parallelformers](https://github.com/tunib-ai/parallelformers) provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
|
||||
- core: uses PyTorch 2 APIs to support tensor parallelism to models having base_tp_plan in their respective config classes.
|
||||
- Alternatively, you can as well try [parallelformers](https://github.com/tunib-ai/parallelformers) that provides this support for most of our models. Training mode with TP is as well supported natively in transformers.
|
||||
- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more [here](https://www.deepspeed.ai/tutorials/inference-tutorial/)
|
||||
|
||||
🤗 Accelerate integrates with [TP from Megatron-LM](https://huggingface.co/docs/accelerate/v0.23.0/en/usage_guides/megatron_lm).
|
||||
@ -535,7 +536,7 @@ Important papers:
|
||||
- [Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model](
|
||||
https://arxiv.org/abs/2201.11990)
|
||||
|
||||
🤗 Transformers status: not yet implemented, since we have no PP and TP.
|
||||
🤗 Transformers status: not yet implemented, since we have no PP.
|
||||
|
||||
## FlexFlow
|
||||
|
||||
|
@ -799,6 +799,29 @@ tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Tensor Parallelism with PyTorch 2">
|
||||
|
||||
```yml
|
||||
compute_environment: LOCAL_MACHINE
|
||||
tp_config:
|
||||
tp_size: 4
|
||||
distributed_type: TP
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: 'no'
|
||||
num_machines: 1
|
||||
num_processes: 4
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
|
@ -361,6 +361,30 @@ use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
|
||||
<hfoption id="Tensor Parallelism with PyTorch 2">
|
||||
|
||||
```yml
|
||||
compute_environment: LOCAL_MACHINE
|
||||
tp_config:
|
||||
tp_size: 4
|
||||
distributed_type: TP
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: 'no'
|
||||
num_machines: 1
|
||||
num_processes: 4
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
|
@ -85,7 +85,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
3. 해당 기능의 사용법을 보여주는 *코드 스니펫*을 제공해 주세요.
|
||||
4. 기능과 관련된 논문이 있는 경우 링크를 포함해 주세요.
|
||||
|
||||
이슈가 잘 작성되었다면 이슈가 생성된 순간, 이미 80% 정도의 작업이 완료된 것입니다.
|
||||
이슈가 잘 작성되었다면 이슈가 생성된 순간, 이미 80% 정도의 작업이 완료된 것입니다.
|
||||
|
||||
이슈를 제기하는 데 도움이 될 만한 [템플릿](https://github.com/huggingface/transformers/tree/main/templates)도 준비되어 있습니다.
|
||||
|
||||
@ -140,7 +140,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
```
|
||||
|
||||
만약 이미 가상 환경에 🤗 Transformers가 설치되어 있다면, `-e` 플래그를 사용하여 설치하기 전에 `pip uninstall transformers`로 제거해주세요.
|
||||
|
||||
|
||||
여러분의 운영체제에 따라서, 그리고 🤗 Transformers의 선택적 의존성의 수가 증가하면서, 이 명령이 실패할 수도 있습니다. 그럴 경우 사용하려는 딥러닝 프레임워크(PyTorch, TensorFlow, 그리고/또는 Flax)를 설치한 후 아래 명령을 실행해주세요:
|
||||
|
||||
```bash
|
||||
@ -188,7 +188,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
이러한 검사에 대해 자세히 알아보고 관련 문제를 해결하는 방법은 [Pull Request에 대한 검사](https://huggingface.co/docs/transformers/pr_checks) 가이드를 확인하세요.
|
||||
|
||||
만약 `docs/source` 디렉터리 아래의 문서를 수정하는 경우, 문서가 빌드될 수 있는지 확인하세요. 이 검사는 Pull Request를 열 때도 CI에서 실행됩니다. 로컬 검사를 실행하려면 문서 빌더를 설치해야 합니다:
|
||||
|
||||
|
||||
```bash
|
||||
pip install ".[docs]"
|
||||
```
|
||||
@ -216,7 +216,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
git fetch upstream
|
||||
git rebase upstream/main
|
||||
```
|
||||
|
||||
|
||||
변경 사항을 브랜치에 푸시하세요:
|
||||
|
||||
```bash
|
||||
@ -238,7 +238,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
☐ 새로운 기능을 추가하는 경우, 해당 기능에 대한 테스트도 추가하세요.<br>
|
||||
- 새 모델을 추가하는 경우, `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)`을 사용하여 일반적인 테스트를 활성화하세요.
|
||||
- 새 `@slow` 테스트를 추가하는 경우, 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
|
||||
- 새 토크나이저를 추가하는 경우, 테스트를 작성하고 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py`.
|
||||
- 새 토크나이저를 추가하는 경우, 테스트를 작성하고 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py`.
|
||||
- CircleCI에서는 느린 테스트를 실행하지 않지만, GitHub Actions에서는 매일 밤 실행됩니다!<br>
|
||||
|
||||
☐ 모든 공개 메소드는 유용한 기술문서를 가져야 합니다 (예를 들어 [`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py) 참조).<br>
|
||||
@ -283,7 +283,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
|
||||
느린 테스트와 마찬가지로, 다음과 같이 테스트 중에 기본적으로 활성화되지 않는 다른 환경 변수도 있습니다:
|
||||
- `RUN_CUSTOM_TOKENIZERS`: 사용자 정의 토크나이저 테스트를 활성화합니다.
|
||||
- `RUN_PT_FLAX_CROSS_TESTS`: PyTorch + Flax 통합 테스트를 활성화합니다.
|
||||
- `RUN_PT_TF_CROSS_TESTS`: TensorFlow + PyTorch 통합 테스트를 활성화합니다.
|
||||
|
||||
더 많은 환경 변수와 추가 정보는 [testing_utils.py](src/transformers/testing_utils.py)에서 찾을 수 있습니다.
|
||||
|
||||
|
@ -548,6 +548,29 @@ tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Tensor Parallelism with PyTorch 2">
|
||||
|
||||
```yml
|
||||
compute_environment: LOCAL_MACHINE
|
||||
tp_config:
|
||||
tp_size: 4
|
||||
distributed_type: TP
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: 'no'
|
||||
num_machines: 1
|
||||
num_processes: 4
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
|
@ -33,7 +33,7 @@ limitations under the License.
|
||||
* 实现新的模型。
|
||||
* 为示例或文档做贡献。
|
||||
|
||||
如果你不知道从哪里开始,有一个特别的 [Good First Issue](https://github.com/huggingface/transformers/contribute) 列表。它会列出一些适合初学者的开放的 issues,并帮助你开始为开源项目做贡献。只需要在你想要处理的 issue 下发表评论就行。
|
||||
如果你不知道从哪里开始,有一个特别的 [Good First Issue](https://github.com/huggingface/transformers/contribute) 列表。它会列出一些适合初学者的开放的 issues,并帮助你开始为开源项目做贡献。只需要在你想要处理的 issue 下发表评论就行。
|
||||
|
||||
如果想要稍微更有挑战性的内容,你也可以查看 [Good Second Issue](https://github.com/huggingface/transformers/labels/Good%20Second%20Issue) 列表。总的来说,如果你觉得自己知道该怎么做,就去做吧,我们会帮助你达到目标的!🚀
|
||||
|
||||
@ -139,7 +139,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
```
|
||||
|
||||
如果在虚拟环境中已经安装了 🤗 Transformers,请先使用 `pip uninstall transformers` 卸载它,然后再用 `-e` 参数以可编辑模式重新安装。
|
||||
|
||||
|
||||
根据你的操作系统,以及 Transformers 的可选依赖项数量的增加,可能会在执行此命令时出现失败。如果出现这种情况,请确保已经安装了你想使用的深度学习框架(PyTorch, TensorFlow 和 Flax),然后执行以下操作:
|
||||
|
||||
```bash
|
||||
@ -187,7 +187,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
想要了解有关这些检查及如何解决相关问题的更多信息,请阅读 [检查 Pull Request](https://huggingface.co/docs/transformers/pr_checks) 指南。
|
||||
|
||||
如果你修改了 `docs/source` 目录下的文档,请确保文档仍然能够被构建。这个检查也会在你创建 PR 时在 CI 中运行。如果要进行本地检查,请确保安装了文档构建工具:
|
||||
|
||||
|
||||
```bash
|
||||
pip install ".[docs]"
|
||||
```
|
||||
@ -282,7 +282,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
|
||||
和时间较长的测试一样,还有其他环境变量在测试过程中,在默认情况下是未启用的:
|
||||
- `RUN_CUSTOM_TOKENIZERS`: 启用自定义分词器的测试。
|
||||
- `RUN_PT_FLAX_CROSS_TESTS`: 启用 PyTorch + Flax 整合的测试。
|
||||
- `RUN_PT_TF_CROSS_TESTS`: 启用 TensorFlow + PyTorch 整合的测试。
|
||||
|
||||
更多环境变量和额外信息可以在 [testing_utils.py](src/transformers/testing_utils.py) 中找到。
|
||||
|
||||
|
@ -61,7 +61,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -43,7 +43,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -53,7 +53,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -58,7 +58,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
# You should update this to your particular problem to have better documentation of `model_type`
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")
|
||||
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = get_logger(__name__)
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -53,7 +53,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
78
examples/quantization/custom_quantization.py
Normal file
78
examples/quantization/custom_quantization.py
Normal file
@ -0,0 +1,78 @@
|
||||
import json
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from transformers.quantizers import HfQuantizer, register_quantization_config, register_quantizer
|
||||
from transformers.utils.quantization_config import QuantizationConfigMixin
|
||||
|
||||
|
||||
@register_quantization_config("custom")
|
||||
class CustomConfig(QuantizationConfigMixin):
|
||||
def __init__(self):
|
||||
self.quant_method = "custom"
|
||||
self.bits = 8
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
output = {
|
||||
"num_bits": self.bits,
|
||||
}
|
||||
return output
|
||||
|
||||
def __repr__(self):
|
||||
config_dict = self.to_dict()
|
||||
return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n"
|
||||
|
||||
def to_diff_dict(self) -> Dict[str, Any]:
|
||||
config_dict = self.to_dict()
|
||||
|
||||
default_config_dict = CustomConfig().to_dict()
|
||||
|
||||
serializable_config_dict = {}
|
||||
|
||||
for key, value in config_dict.items():
|
||||
if value != default_config_dict[key]:
|
||||
serializable_config_dict[key] = value
|
||||
|
||||
return serializable_config_dict
|
||||
|
||||
|
||||
@register_quantizer("custom")
|
||||
class CustomQuantizer(HfQuantizer):
|
||||
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
||||
super().__init__(quantization_config, **kwargs)
|
||||
self.quantization_config = quantization_config
|
||||
self.scale_map = {}
|
||||
self.device = kwargs.get("device", "cuda" if torch.cuda.is_available() else "cpu")
|
||||
self.torch_dtype = kwargs.get("torch_dtype", torch.float32)
|
||||
|
||||
def _process_model_before_weight_loading(self, model, **kwargs):
|
||||
return True
|
||||
|
||||
def _process_model_after_weight_loading(self, model, **kwargs):
|
||||
return True
|
||||
|
||||
def is_serializable(self) -> bool:
|
||||
return True
|
||||
|
||||
def is_trainable(self) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
model_8bit = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m", quantization_config=CustomConfig(), torch_dtype="auto"
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
input_text = "once there is"
|
||||
inputs = tokenizer(input_text, return_tensors="pt")
|
||||
output = model_8bit.generate(
|
||||
**inputs,
|
||||
max_length=100,
|
||||
num_return_sequences=1,
|
||||
no_repeat_ngram_size=2,
|
||||
)
|
||||
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
||||
|
||||
print(generated_text)
|
@ -1,5 +1,5 @@
|
||||
datasets==2.3.2
|
||||
transformers==4.38.0
|
||||
transformers==4.48.0
|
||||
wandb==0.13.1
|
||||
evaluate==0.2.2
|
||||
scikit-learn==1.5.0
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version(
|
||||
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -62,7 +62,7 @@ except (ModuleNotFoundError, ImportError):
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -53,7 +53,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Checking dependencies
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Dependencies and constants
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.49.0.dev0")
|
||||
check_min_version("4.50.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
3
setup.py
3
setup.py
@ -437,7 +437,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.49.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.50.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
@ -473,7 +473,6 @@ setup(
|
||||
extras["tests_torch"] = deps_list()
|
||||
extras["tests_tf"] = deps_list()
|
||||
extras["tests_flax"] = deps_list()
|
||||
extras["tests_torch_and_tf"] = deps_list()
|
||||
extras["tests_torch_and_flax"] = deps_list()
|
||||
extras["tests_hub"] = deps_list()
|
||||
extras["tests_pipelines_torch"] = deps_list()
|
||||
|
@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.49.0.dev0"
|
||||
__version__ = "4.50.0.dev0"
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
@ -390,6 +390,7 @@ def spectrogram(
|
||||
center: bool = True,
|
||||
pad_mode: str = "reflect",
|
||||
onesided: bool = True,
|
||||
dither: float = 0.0,
|
||||
preemphasis: Optional[float] = None,
|
||||
mel_filters: Optional[np.ndarray] = None,
|
||||
mel_floor: float = 1e-10,
|
||||
@ -460,6 +461,12 @@ def spectrogram(
|
||||
onesided (`bool`, *optional*, defaults to `True`):
|
||||
If True, only computes the positive frequencies and returns a spectrogram containing `fft_length // 2 + 1`
|
||||
frequency bins. If False, also computes the negative frequencies and returns `fft_length` frequency bins.
|
||||
dither (`float`, *optional*, defaults to 0.0):
|
||||
Adds dithering. In other words, adds a small Gaussian noise to each frame.
|
||||
E.g. use 4.0 to add dithering with a normal distribution centered
|
||||
around 0.0 with standard deviation 4.0, 0.0 means no dithering.
|
||||
Dithering has similar effect as `mel_floor`. It reduces the high log_mel_fbank
|
||||
values for signals with hard-zero sections, when VAD cutoff is present in the signal.
|
||||
preemphasis (`float`, *optional*)
|
||||
Coefficient for a low-pass filter that applies pre-emphasis before the DFT.
|
||||
mel_filters (`np.ndarray` of shape `(num_freq_bins, num_mel_filters)`, *optional*):
|
||||
@ -540,6 +547,9 @@ def spectrogram(
|
||||
for frame_idx in range(num_frames):
|
||||
buffer[:frame_length] = waveform[timestep : timestep + frame_length]
|
||||
|
||||
if dither != 0.0:
|
||||
buffer[:frame_length] += dither * np.random.randn(frame_length)
|
||||
|
||||
if remove_dc_offset:
|
||||
buffer[:frame_length] = buffer[:frame_length] - buffer[:frame_length].mean()
|
||||
|
||||
@ -591,6 +601,7 @@ def spectrogram_batch(
|
||||
center: bool = True,
|
||||
pad_mode: str = "reflect",
|
||||
onesided: bool = True,
|
||||
dither: float = 0.0,
|
||||
preemphasis: Optional[float] = None,
|
||||
mel_filters: Optional[np.ndarray] = None,
|
||||
mel_floor: float = 1e-10,
|
||||
@ -653,6 +664,10 @@ def spectrogram_batch(
|
||||
The padding strategy when `center` is `True`.
|
||||
onesided (`bool`, *optional*, defaults to `True`):
|
||||
If True, returns a one-sided spectrogram for real input signals.
|
||||
dither (`float`, *optional*, defaults to 0.0):
|
||||
Adds dithering. In other words, adds a small Gaussian noise to each frame.
|
||||
E.g. use 4.0 to add dithering with a normal distribution centered
|
||||
around 0.0 with standard deviation 4.0, 0.0 means no dithering.
|
||||
preemphasis (`float`, *optional*):
|
||||
Applies a pre-emphasis filter to each frame.
|
||||
mel_filters (`np.ndarray`, *optional*):
|
||||
@ -741,6 +756,9 @@ def spectrogram_batch(
|
||||
timestep = frame_idx * hop_length
|
||||
buffer[:, :frame_length] = padded_waveform_batch[:, timestep : timestep + frame_length]
|
||||
|
||||
if dither != 0.0:
|
||||
buffer[:, :frame_length] += dither * np.random.randn(*buffer[:, :frame_length].shape)
|
||||
|
||||
if remove_dc_offset:
|
||||
buffer[:, :frame_length] -= buffer[:, :frame_length].mean(axis=1, keepdims=True)
|
||||
|
||||
|
@ -363,8 +363,7 @@ class DynamicCache(Cache):
|
||||
```
|
||||
"""
|
||||
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def __init__(self, num_hidden_layers: Optional[int] = None) -> None:
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._seen_tokens = 0 # Used in `generate` to keep tally of how many tokens the cache has seen
|
||||
self.key_cache: List[torch.Tensor] = []
|
||||
@ -466,10 +465,7 @@ class DynamicCache(Cache):
|
||||
return legacy_cache
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def from_legacy_cache(
|
||||
cls, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, num_hidden_layers: int = None
|
||||
) -> "DynamicCache":
|
||||
def from_legacy_cache(cls, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None) -> "DynamicCache":
|
||||
"""Converts a cache in the legacy cache format into an equivalent `DynamicCache`. Used for
|
||||
backward compatibility."""
|
||||
cache = cls()
|
||||
@ -495,10 +491,7 @@ class DynamicCache(Cache):
|
||||
self.key_cache[idx] = self.key_cache[idx][..., :max_length, :]
|
||||
self.value_cache[idx] = self.value_cache[idx][..., :max_length, :]
|
||||
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def batch_split(
|
||||
self, full_batch_size: int, split_size: int, num_hidden_layers: int = None
|
||||
) -> List["DynamicCache"]:
|
||||
def batch_split(self, full_batch_size: int, split_size: int) -> List["DynamicCache"]:
|
||||
"""Split the current instance into a list of `DynamicCache` by the batch size. This will be used by
|
||||
`_split_model_inputs()` in `generation.utils`"""
|
||||
out = []
|
||||
@ -511,8 +504,7 @@ class DynamicCache(Cache):
|
||||
return out
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def from_batch_splits(cls, splits: List["DynamicCache"], num_hidden_layers: int = None) -> "DynamicCache":
|
||||
def from_batch_splits(cls, splits: List["DynamicCache"]) -> "DynamicCache":
|
||||
"""This is the opposite of the above `batch_split()` method. This will be used by `stack_model_outputs` in
|
||||
`generation.utils`"""
|
||||
cache = cls()
|
||||
@ -1527,10 +1519,7 @@ class EncoderDecoderCache(Cache):
|
||||
self.check_dynamic_cache(self.crop.__name__)
|
||||
self.self_attention_cache.crop(maximum_length)
|
||||
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def batch_split(
|
||||
self, full_batch_size: int, split_size: int, num_hidden_layers: int = None
|
||||
) -> "List[EncoderDecoderCache]":
|
||||
def batch_split(self, full_batch_size: int, split_size: int) -> "List[EncoderDecoderCache]":
|
||||
"""Split the current instance into a list of `DynamicCache` by the batch size. This will be used by
|
||||
`_split_model_inputs()` in `generation.utils`"""
|
||||
self.check_dynamic_cache(self.batch_split.__name__)
|
||||
@ -1543,10 +1532,7 @@ class EncoderDecoderCache(Cache):
|
||||
return out
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwarg("num_hidden_layers", version="4.47.0")
|
||||
def from_batch_splits(
|
||||
cls, splits: List["EncoderDecoderCache"], num_hidden_layers: int = None
|
||||
) -> "EncoderDecoderCache":
|
||||
def from_batch_splits(cls, splits: List["EncoderDecoderCache"]) -> "EncoderDecoderCache":
|
||||
"""This is the opposite of the above `batch_split()` method. This will be used by `stack_model_outputs` in
|
||||
`generation.utils`"""
|
||||
self_attention_cache = DynamicCache()
|
||||
|
@ -420,6 +420,7 @@ class GenerationMixin:
|
||||
model_inputs[input_ids_key] = input_ids.clone(memory_format=torch.contiguous_format)
|
||||
|
||||
# 4. Create missing `position_ids` on the fly
|
||||
encoder_attention_mask = attention_mask if self.config.is_encoder_decoder else None
|
||||
attention_mask = (
|
||||
kwargs.pop("decoder_attention_mask", None) if self.config.is_encoder_decoder else attention_mask
|
||||
)
|
||||
@ -490,6 +491,9 @@ class GenerationMixin:
|
||||
if attention_mask is not None:
|
||||
model_inputs[attention_mask_key] = attention_mask
|
||||
|
||||
if encoder_attention_mask is not None:
|
||||
model_inputs["attention_mask"] = encoder_attention_mask
|
||||
|
||||
# 7. Forward ALL kwargs that are uninitialized (e.g. `use_cache`).
|
||||
for key, value in kwargs.items():
|
||||
if key not in model_inputs:
|
||||
@ -4520,7 +4524,7 @@ def _ranking_fast(
|
||||
return selected_idx
|
||||
|
||||
|
||||
def _split(data, full_batch_size: int, num_hidden_layers: int, split_size: int = None):
|
||||
def _split(data, full_batch_size: int, split_size: int = None):
|
||||
"""
|
||||
Takes care of three cases:
|
||||
1. data is a tensor: e.g. last_hidden_state, pooler_output etc. split them on the batch_size dim
|
||||
@ -4538,7 +4542,7 @@ def _split(data, full_batch_size: int, num_hidden_layers: int, split_size: int =
|
||||
elif isinstance(data, DynamicCache) or (
|
||||
isinstance(data, EncoderDecoderCache) and isinstance(data.self_attention_cache, DynamicCache)
|
||||
):
|
||||
return data.batch_split(full_batch_size, split_size, num_hidden_layers)
|
||||
return data.batch_split(full_batch_size, split_size)
|
||||
elif isinstance(data, tuple):
|
||||
# If the elements of the tuple are also tuples (e.g., past_key_values in our earlier example)
|
||||
if isinstance(data[0], tuple):
|
||||
@ -4591,11 +4595,9 @@ def _split_model_inputs(
|
||||
keys_to_ignore = ["cache_position", "encoder_outputs", "logits_to_keep"]
|
||||
non_bool_keys = [k for k in keys if not isinstance(model_input[k], bool) and k not in keys_to_ignore]
|
||||
|
||||
num_hidden_layers = config.get_text_config().num_hidden_layers
|
||||
|
||||
# we split the tensors and tuples of tensors
|
||||
data_split_list = [
|
||||
{k: _split(model_input[k], full_batch_size, num_hidden_layers, split_size)[i] for k in non_bool_keys}
|
||||
{k: _split(model_input[k], full_batch_size, split_size)[i] for k in non_bool_keys}
|
||||
for i in range(full_batch_size // split_size)
|
||||
]
|
||||
# bool values are the same and replicated for each split
|
||||
@ -4632,7 +4634,6 @@ def stack_model_outputs(model_outputs: List[ModelOutput], config: PretrainedConf
|
||||
|
||||
# Infer the class from the first object in the list
|
||||
model_output_cls = type(model_outputs[0])
|
||||
num_hidden_layers = config.get_text_config().num_hidden_layers
|
||||
|
||||
# Ensure all objects are of the same type
|
||||
if not all(isinstance(obj, model_output_cls) for obj in model_outputs):
|
||||
@ -4649,9 +4650,9 @@ def stack_model_outputs(model_outputs: List[ModelOutput], config: PretrainedConf
|
||||
return torch.cat(data, dim=0)
|
||||
# New cache format
|
||||
elif isinstance(data[0], DynamicCache):
|
||||
return DynamicCache.from_batch_splits(data, num_hidden_layers=num_hidden_layers)
|
||||
return DynamicCache.from_batch_splits(data)
|
||||
elif isinstance(data[0], EncoderDecoderCache):
|
||||
return EncoderDecoderCache.from_batch_splits(data, num_hidden_layers=num_hidden_layers)
|
||||
return EncoderDecoderCache.from_batch_splits(data)
|
||||
elif isinstance(data[0], tuple):
|
||||
# If the elements of the tuple are also tuples (e.g., past_key_values in our earlier example)
|
||||
if isinstance(data[0][0], tuple):
|
||||
|
@ -787,6 +787,7 @@ def _load_state_dict_into_meta_model(
|
||||
keep_in_fp32_modules=None,
|
||||
unexpected_keys=None, # passing `unexpected` for cleanup from quantization items
|
||||
pretrained_model_name_or_path=None, # for flagging the user when the model contains renamed keys
|
||||
device_mesh=None,
|
||||
):
|
||||
"""
|
||||
This is somewhat similar to `_load_state_dict_into_model`, but deals with a model that has some or all of its
|
||||
@ -796,6 +797,8 @@ def _load_state_dict_into_meta_model(
|
||||
`start_prefix` is used for models which insert their name into model keys, e.g. `bert` in
|
||||
`bert.pooler.dense.weight`
|
||||
|
||||
It also initialize tensor parallelism for each module if needed.
|
||||
|
||||
"""
|
||||
|
||||
# XXX: remaining features to implement to be fully compatible with _load_state_dict_into_model
|
||||
@ -809,6 +812,12 @@ def _load_state_dict_into_meta_model(
|
||||
|
||||
is_torch_e4m3fn_available = hasattr(torch, "float8_e4m3fn")
|
||||
|
||||
# we need this later to initialize tensor parallelism
|
||||
if device_mesh is not None:
|
||||
full_tp_plan = model.config.base_model_tp_plan
|
||||
for submodule in model.modules():
|
||||
full_tp_plan.update(getattr(submodule, "_tp_plan", {}))
|
||||
|
||||
for param_name, param in state_dict.items():
|
||||
if param_name not in expected_keys:
|
||||
continue
|
||||
@ -912,6 +921,37 @@ def _load_state_dict_into_meta_model(
|
||||
setattr(module, tensor_name, value)
|
||||
# TODO: consider removing used param_parts from state_dict before return
|
||||
|
||||
# In this case, let's parallelize the modules!
|
||||
if device_mesh is not None:
|
||||
# Immediate parent
|
||||
split_parent_module_name = param_name.split(".")[:-1]
|
||||
parent_module_name = ".".join(split_parent_module_name)
|
||||
parent_module = model
|
||||
for name in split_parent_module_name:
|
||||
parent_module = getattr(parent_module, name)
|
||||
|
||||
# Check if we are part of the tp_plan
|
||||
current_module_plan = None
|
||||
for param, plan in full_tp_plan.items():
|
||||
# "*" are a placeholder for layer indices, so we replace them by "[0-9]+" in the regex pattern
|
||||
pattern = param.replace("*", "[0-9]+")
|
||||
if re.search(pattern, parent_module_name):
|
||||
current_module_plan = plan
|
||||
break
|
||||
|
||||
# We can only apply the tp_plan after all parameters of the current module have been correctly initialized (e.g.
|
||||
# if we have bias, we need both `weights` and `bias` of a nn.Linear to be initialized)
|
||||
process_device = list(device_map.values())[0]
|
||||
all_module_parameters_initialized = all(
|
||||
m.device == process_device for m in parent_module.parameters(recurse=False)
|
||||
) and all(m.device == process_device for m in parent_module.buffers(recurse=False))
|
||||
if current_module_plan is not None and all_module_parameters_initialized:
|
||||
torch.distributed.tensor.parallel.parallelize_module(
|
||||
parent_module,
|
||||
device_mesh=device_mesh,
|
||||
parallelize_plan=translate_to_torch_parallel_style(current_module_plan),
|
||||
)
|
||||
|
||||
return error_msgs, offload_index, state_dict_index
|
||||
|
||||
|
||||
@ -3489,12 +3529,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
)
|
||||
|
||||
# We need to correctly dispatch the model on the current process device. The easiest way for this is to use a simple
|
||||
# `device_map` pointing to the correct device. If we don't, torch will use the default device (index 0) for all
|
||||
# childs processes at parallelization time, resulting in excessive memory usage on device 0 and OOMs.
|
||||
# And temporarily setting the default device to current process rank result in the following error
|
||||
# `torch.distributed.DistBackendError: Attempt to perform collective on tensor not on device passed to init_process_group`
|
||||
tp_device = None
|
||||
# `device_map` pointing to the correct device
|
||||
device_mesh = None
|
||||
if tp_plan is not None:
|
||||
if not is_torch_greater_or_equal("2.5"):
|
||||
raise EnvironmentError("tensor parallel is only supported for `torch>=2.5`.")
|
||||
if not torch.distributed.is_initialized():
|
||||
raise ValueError("Tensor Parallel requires torch.distributed to be initialized first.")
|
||||
|
||||
@ -3506,6 +3545,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
# This is the easiest way to dispatch to the current process device
|
||||
device_map = tp_device
|
||||
|
||||
# Assuming sharding the model onto the world
|
||||
world_size = torch.distributed.get_world_size()
|
||||
device_mesh = torch.distributed.init_device_mesh(tp_device.type, (world_size,))
|
||||
|
||||
if is_fsdp_enabled():
|
||||
low_cpu_mem_usage = True
|
||||
|
||||
@ -3600,7 +3643,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
if low_cpu_mem_usage is None:
|
||||
low_cpu_mem_usage = True
|
||||
elif not low_cpu_mem_usage:
|
||||
raise ValueError("Passing along a `device_map` requires `low_cpu_mem_usage=True`")
|
||||
raise ValueError("Passing along a `device_map` or a `tp_plan` requires `low_cpu_mem_usage=True`")
|
||||
|
||||
if low_cpu_mem_usage:
|
||||
if is_deepspeed_zero3_enabled():
|
||||
@ -3609,7 +3652,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
)
|
||||
elif not is_accelerate_available():
|
||||
raise ImportError(
|
||||
f"Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
|
||||
f"Using `low_cpu_mem_usage=True`, a `device_map` or a `tp_plan` requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
|
||||
)
|
||||
|
||||
# handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation.
|
||||
@ -3706,8 +3749,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
device_map = hf_quantizer.update_device_map(device_map)
|
||||
|
||||
# In order to ensure popular quantization methods are supported. Can be disable with `disable_telemetry`
|
||||
user_agent["quant"] = hf_quantizer.quantization_config.quant_method.value
|
||||
|
||||
if hasattr(hf_quantizer.quantization_config.quant_method, "value"):
|
||||
user_agent["quant"] = hf_quantizer.quantization_config.quant_method.value
|
||||
else:
|
||||
user_agent["quant"] = hf_quantizer.quantization_config.quant_method
|
||||
# Force-set to `True` for more mem efficiency
|
||||
if low_cpu_mem_usage is None:
|
||||
low_cpu_mem_usage = True
|
||||
@ -4184,6 +4229,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
# Let's make sure we don't run the init function of buffer modules
|
||||
model = cls(config, *model_args, **model_kwargs)
|
||||
|
||||
if device_mesh is not None and not model.supports_tp_plan:
|
||||
raise NotImplementedError("This model does not have a tensor parallel plan.")
|
||||
|
||||
# make sure we use the model's config since the __init__ call might have copied it
|
||||
config = model.config
|
||||
|
||||
@ -4334,6 +4382,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
keep_in_fp32_modules=keep_in_fp32_modules,
|
||||
gguf_path=gguf_path,
|
||||
weights_only=weights_only,
|
||||
device_mesh=device_mesh,
|
||||
)
|
||||
|
||||
# make sure token embedding weights are still tied if needed
|
||||
@ -4368,8 +4417,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
)
|
||||
pass
|
||||
|
||||
# Dispatch model with hooks on all devices if necessary
|
||||
if device_map is not None:
|
||||
# Dispatch model with hooks on all devices if necessary (not needed with a tp_plan, so we skip it as it slightly
|
||||
# harm performances)
|
||||
if device_map is not None and device_mesh is None:
|
||||
device_map_kwargs = {
|
||||
"device_map": device_map,
|
||||
"offload_dir": offload_folder,
|
||||
@ -4396,6 +4446,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
if not is_fsdp_enabled() and not is_deepspeed_zero3_enabled():
|
||||
dispatch_model(model, **device_map_kwargs)
|
||||
|
||||
# This is needed for the RotaryEmbedding, which was not initialized on the correct device as it is
|
||||
# not part of the state_dict (persistent=False)
|
||||
if device_mesh is not None:
|
||||
for buffer in model.buffers():
|
||||
if buffer.device != tp_device:
|
||||
buffer.data = buffer.to(tp_device)
|
||||
|
||||
if hf_quantizer is not None:
|
||||
hf_quantizer.postprocess_model(model, config=config)
|
||||
model.hf_quantizer = hf_quantizer
|
||||
@ -4418,16 +4475,6 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
}
|
||||
return model, loading_info
|
||||
|
||||
if tp_plan is not None:
|
||||
assert tp_device is not None, "tp_device not set!"
|
||||
if not model.supports_tp_plan:
|
||||
raise NotImplementedError("This model does not have a tensor parallel plan.")
|
||||
# Assuming sharding the model onto the world
|
||||
world_size = torch.distributed.get_world_size()
|
||||
device_mesh = torch.distributed.init_device_mesh(tp_device.type, (world_size,))
|
||||
# Apply Tensor Parallelism
|
||||
model.tensor_parallel(device_mesh)
|
||||
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
@ -4521,6 +4568,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
keep_in_fp32_modules=None,
|
||||
gguf_path=None,
|
||||
weights_only=True,
|
||||
device_mesh=None,
|
||||
):
|
||||
is_safetensors = False
|
||||
is_quantized = hf_quantizer is not None
|
||||
@ -4820,6 +4868,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
is_safetensors=is_safetensors,
|
||||
keep_in_fp32_modules=keep_in_fp32_modules,
|
||||
unexpected_keys=unexpected_keys,
|
||||
device_mesh=device_mesh,
|
||||
)
|
||||
else:
|
||||
# Sharded checkpoint or whole but low_cpu_mem_usage==True
|
||||
@ -4909,6 +4958,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
is_safetensors=is_safetensors,
|
||||
keep_in_fp32_modules=keep_in_fp32_modules,
|
||||
unexpected_keys=unexpected_keys,
|
||||
device_mesh=device_mesh,
|
||||
)
|
||||
error_msgs += new_error_msgs
|
||||
else:
|
||||
@ -5186,7 +5236,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
|
||||
def tensor_parallel(self, device_mesh):
|
||||
"""
|
||||
Tensor parallelize the model across the given device mesh.
|
||||
Tensor parallelize the model across the given device mesh. This function is a helper to be called after the model
|
||||
was already loaded in memory, note however that this means that each process will first initialize the whole model,
|
||||
then parallelize it accross devices. Thus there is a huge waste of GPU memory, and this can lead to OOM at loading time.
|
||||
|
||||
Calling `from_pretrained(..., tp_plan="auto")` is prefered, and will parallelize module-by-module during initialization,
|
||||
so that the expected per-device memory spike at loading time is not larger than the final model size on each device.
|
||||
|
||||
Args:
|
||||
device_mesh (`torch.distributed.DeviceMesh`):
|
||||
|
@ -682,7 +682,7 @@ class FuyuProcessor(ProcessorMixin):
|
||||
|
||||
return results
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(self, generated_outputs, skip_special_tokens=True, **kwargs):
|
||||
"""
|
||||
Post-processes the output of `FuyuForConditionalGeneration` to only return the text output.
|
||||
|
||||
@ -690,6 +690,10 @@ class FuyuProcessor(ProcessorMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
containing the token ids of the generated sequences.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text output.
|
||||
@ -706,7 +710,7 @@ class FuyuProcessor(ProcessorMixin):
|
||||
for i, seq in enumerate(unpadded_output_sequences):
|
||||
padded_output_sequences[i, : len(seq)] = torch.tensor(seq)
|
||||
|
||||
return self.batch_decode(padded_output_sequences, skip_special_tokens=True)
|
||||
return self.batch_decode(padded_output_sequences, skip_special_tokens=skip_special_tokens, **kwargs)
|
||||
|
||||
def batch_decode(self, *args, **kwargs):
|
||||
"""
|
||||
|
@ -428,7 +428,7 @@ class Kosmos2Processor(ProcessorMixin):
|
||||
return clean_text_and_extract_entities_with_bboxes(caption)
|
||||
return caption
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(self, generated_outputs, skip_special_tokens=True, **kwargs):
|
||||
"""
|
||||
Post-process the output of the model to decode the text.
|
||||
|
||||
@ -436,11 +436,15 @@ class Kosmos2Processor(ProcessorMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
or `(sequence_length,)`.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text.
|
||||
"""
|
||||
generated_texts = self.batch_decode(generated_outputs, skip_special_tokens=True)
|
||||
generated_texts = self.batch_decode(generated_outputs, skip_special_tokens=skip_special_tokens, **kwargs)
|
||||
return [self.post_process_generation(text, cleanup_and_extract=False) for text in generated_texts]
|
||||
|
||||
@property
|
||||
|
@ -16,24 +16,33 @@
|
||||
Processor class for LLaVa-NeXT-Video.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, List, Optional, Union
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature
|
||||
from ...image_processing_utils import select_best_resolution
|
||||
from ...image_utils import ImageInput, VideoInput, get_image_size, to_numpy_array
|
||||
from ...processing_utils import ProcessorMixin
|
||||
from ...tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
|
||||
from ...utils import TensorType, logging
|
||||
from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack, _validate_images_text_input_order
|
||||
from ...tokenization_utils_base import PreTokenizedInput, TextInput
|
||||
from ...utils import logging
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class LlavaNextVideoProcessorKwargs(ProcessingKwargs, total=False):
|
||||
# see processing_utils.ProcessingKwargs documentation for usage.
|
||||
_defaults = {
|
||||
"text_kwargs": {
|
||||
"padding": False,
|
||||
},
|
||||
"common_kwargs": {
|
||||
"return_tensors": "pt",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
r"""
|
||||
Constructs a LLaVa-NeXT-Video processor which wraps a LLaVa-NeXT image processor, LLaVa-NeXT-Video video processor and
|
||||
@ -102,13 +111,11 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]],
|
||||
images: ImageInput = None,
|
||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
||||
audio=None,
|
||||
videos: VideoInput = None,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
max_length: int = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
|
||||
**kwargs: Unpack[LlavaNextVideoProcessorKwargs],
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
|
||||
@ -130,19 +137,6 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`):
|
||||
The image or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch
|
||||
tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported.
|
||||
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding
|
||||
index) among:
|
||||
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
||||
sequence if provided).
|
||||
- `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
|
||||
acceptable input length for the model if that argument is not provided.
|
||||
- `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
|
||||
lengths).
|
||||
max_length (`int`, *optional*):
|
||||
Maximum length of the returned list and optionally padding length (see above).
|
||||
truncation (`bool`, *optional*):
|
||||
Activates truncation to cut input sequences longer than `max_length` to `max_length`.
|
||||
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
||||
If set, will return tensors of a particular framework. Acceptable values are:
|
||||
|
||||
@ -160,13 +154,21 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
`None`).
|
||||
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
|
||||
"""
|
||||
# check if images and text inputs are reversed for BC
|
||||
images, text = _validate_images_text_input_order(images, text)
|
||||
|
||||
output_kwargs = self._merge_kwargs(
|
||||
LlavaNextVideoProcessorKwargs,
|
||||
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
|
||||
**kwargs,
|
||||
)
|
||||
if images is not None:
|
||||
image_inputs = self.image_processor(images, return_tensors=return_tensors)
|
||||
image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
|
||||
else:
|
||||
image_inputs = {}
|
||||
|
||||
if videos is not None:
|
||||
videos_inputs = self.video_processor(videos, return_tensors=return_tensors)
|
||||
videos_inputs = self.video_processor(videos, **output_kwargs["videos_kwargs"])
|
||||
else:
|
||||
videos_inputs = {}
|
||||
|
||||
@ -212,13 +214,7 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
prompt_strings.append(sample)
|
||||
text = prompt_strings
|
||||
|
||||
text_inputs = self.tokenizer(
|
||||
text,
|
||||
return_tensors=return_tensors,
|
||||
padding=padding,
|
||||
truncation=truncation,
|
||||
max_length=max_length,
|
||||
)
|
||||
text_inputs = self.tokenizer(text, **output_kwargs["text_kwargs"])
|
||||
return BatchFeature(data={**text_inputs, **image_inputs, **videos_inputs})
|
||||
|
||||
# Copied from transformers.models.llava_next.processing_llava_next.LlavaNextProcessor._get_number_of_features
|
||||
|
@ -41,7 +41,7 @@ class LlavaOnevisionProcessorKwargs(ProcessingKwargs, total=False):
|
||||
"padding": False,
|
||||
},
|
||||
"image_kwargs": {},
|
||||
"video_kwargs": {},
|
||||
"videos_kwargs": {},
|
||||
}
|
||||
|
||||
|
||||
|
@ -346,7 +346,9 @@ class MllamaProcessor(ProcessorMixin):
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(
|
||||
self, generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False, **kwargs
|
||||
):
|
||||
"""
|
||||
Post-process the output of the model to decode the text.
|
||||
|
||||
@ -354,12 +356,21 @@ class MllamaProcessor(ProcessorMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
or `(sequence_length,)`.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(
|
||||
generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
generated_outputs,
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
|
@ -165,8 +165,8 @@ def apply_rotary_pos_emb_flashatt(
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
cos = cos.chunk(2, dim=-1)[0].contiguous()
|
||||
sin = sin.chunk(2, dim=-1)[0].contiguous()
|
||||
q_embed = apply_rotary_emb(q.float(), cos, sin).type_as(q)
|
||||
k_embed = apply_rotary_emb(k.float(), cos, sin).type_as(k)
|
||||
q_embed = apply_rotary_emb(q.float(), cos.float(), sin.float()).type_as(q)
|
||||
k_embed = apply_rotary_emb(k.float(), cos.float(), sin.float()).type_as(k)
|
||||
return q_embed, k_embed
|
||||
|
||||
|
||||
@ -194,8 +194,8 @@ class Qwen2_5_VLVisionFlashAttention2(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_flashatt(q.unsqueeze(0), k.unsqueeze(0), cos, sin)
|
||||
@ -223,7 +223,7 @@ def apply_rotary_pos_emb_vision(
|
||||
orig_q_dtype = q.dtype
|
||||
orig_k_dtype = k.dtype
|
||||
q, k = q.float(), k.float()
|
||||
cos, sin = cos.unsqueeze(-2), sin.unsqueeze(-2)
|
||||
cos, sin = cos.unsqueeze(-2).float(), sin.unsqueeze(-2).float()
|
||||
q_embed = (q * cos) + (rotate_half(q) * sin)
|
||||
k_embed = (k * cos) + (rotate_half(k) * sin)
|
||||
q_embed = q_embed.to(orig_q_dtype)
|
||||
@ -256,8 +256,8 @@ class Qwen2_5_VLVisionAttention(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
|
||||
@ -305,8 +305,8 @@ class Qwen2_5_VLVisionSdpaAttention(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
|
||||
|
@ -70,8 +70,8 @@ def apply_rotary_pos_emb_flashatt(
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
cos = cos.chunk(2, dim=-1)[0].contiguous()
|
||||
sin = sin.chunk(2, dim=-1)[0].contiguous()
|
||||
q_embed = apply_rotary_emb(q.float(), cos, sin).type_as(q)
|
||||
k_embed = apply_rotary_emb(k.float(), cos, sin).type_as(k)
|
||||
q_embed = apply_rotary_emb(q.float(), cos.float(), sin.float()).type_as(q)
|
||||
k_embed = apply_rotary_emb(k.float(), cos.float(), sin.float()).type_as(k)
|
||||
return q_embed, k_embed
|
||||
|
||||
|
||||
@ -170,8 +170,8 @@ class Qwen2_5_VLVisionFlashAttention2(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_flashatt(q.unsqueeze(0), k.unsqueeze(0), cos, sin)
|
||||
|
@ -192,7 +192,9 @@ class Qwen2_5_VLProcessor(ProcessorMixin):
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(
|
||||
self, generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False, **kwargs
|
||||
):
|
||||
"""
|
||||
Post-process the output of the model to decode the text.
|
||||
|
||||
@ -200,12 +202,21 @@ class Qwen2_5_VLProcessor(ProcessorMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
or `(sequence_length,)`.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(
|
||||
generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
generated_outputs,
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
|
@ -220,7 +220,7 @@ def apply_rotary_pos_emb_vision(
|
||||
orig_q_dtype = q.dtype
|
||||
orig_k_dtype = k.dtype
|
||||
q, k = q.float(), k.float()
|
||||
cos, sin = cos.unsqueeze(-2), sin.unsqueeze(-2)
|
||||
cos, sin = cos.unsqueeze(-2).float(), sin.unsqueeze(-2).float()
|
||||
q_embed = (q * cos) + (rotate_half(q) * sin)
|
||||
k_embed = (k * cos) + (rotate_half(k) * sin)
|
||||
q_embed = q_embed.to(orig_q_dtype)
|
||||
@ -318,8 +318,8 @@ class VisionAttention(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
|
||||
@ -367,8 +367,8 @@ class VisionFlashAttention2(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
|
||||
@ -405,8 +405,8 @@ class VisionSdpaAttention(nn.Module):
|
||||
"removed and `position_embeddings` will be mandatory."
|
||||
)
|
||||
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
|
||||
cos = emb.cos().float()
|
||||
sin = emb.sin().float()
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
else:
|
||||
cos, sin = position_embeddings
|
||||
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
|
||||
|
@ -170,7 +170,9 @@ class Qwen2VLProcessor(ProcessorMixin):
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(
|
||||
self, generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False, **kwargs
|
||||
):
|
||||
"""
|
||||
Post-process the output of the model to decode the text.
|
||||
|
||||
@ -178,12 +180,21 @@ class Qwen2VLProcessor(ProcessorMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
or `(sequence_length,)`.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(
|
||||
generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
generated_outputs,
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
|
@ -52,6 +52,13 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
|
||||
Number of Mel-frequency bins.
|
||||
padding_value (`float`, *optional*, defaults to 0.0):
|
||||
The value that is used to fill the padding vectors.
|
||||
dither (`float`, *optional*, defaults to 0.0):
|
||||
Adds dithering. In other words, adds a small Gaussian noise to each frame.
|
||||
E.g. use 4.0 to add dithering with a normal distribution centered
|
||||
around 0.0 with standard deviation 4.0 (assuming [-32k,+32k] range of kaldi waveform).
|
||||
The value 0.0 means no dithering.
|
||||
Dithering has similar effect as `mel_floor`. It reduces the high log_mel_fbank
|
||||
values for signals with hard-zero sections, when VAD cutoff is present in the signal.
|
||||
do_ceptral_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to apply utterance-level cepstral mean and variance normalization to extracted features.
|
||||
normalize_means (`bool`, *optional*, defaults to `True`):
|
||||
@ -68,6 +75,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
|
||||
sampling_rate=16000,
|
||||
num_mel_bins=80,
|
||||
padding_value=0.0,
|
||||
dither=0.0,
|
||||
do_ceptral_normalize=True,
|
||||
normalize_means=True,
|
||||
normalize_vars=True,
|
||||
@ -75,6 +83,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
|
||||
):
|
||||
super().__init__(feature_size=feature_size, sampling_rate=sampling_rate, padding_value=padding_value, **kwargs)
|
||||
self.num_mel_bins = num_mel_bins
|
||||
self.dither = dither
|
||||
self.do_ceptral_normalize = do_ceptral_normalize
|
||||
self.normalize_means = normalize_means
|
||||
self.normalize_vars = normalize_vars
|
||||
@ -106,7 +115,12 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
|
||||
waveform = waveform * (2**15) # Kaldi compliance: 16-bit signed integers
|
||||
if is_speech_available():
|
||||
waveform = torch.from_numpy(waveform).unsqueeze(0)
|
||||
features = ta_kaldi.fbank(waveform, num_mel_bins=self.num_mel_bins, sample_frequency=self.sampling_rate)
|
||||
features = ta_kaldi.fbank(
|
||||
waveform,
|
||||
dither=self.dither,
|
||||
num_mel_bins=self.num_mel_bins,
|
||||
sample_frequency=self.sampling_rate,
|
||||
)
|
||||
features = features.numpy()
|
||||
else:
|
||||
waveform = np.squeeze(waveform)
|
||||
@ -118,6 +132,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
|
||||
fft_length=512,
|
||||
power=2.0,
|
||||
center=False,
|
||||
dither=self.dither,
|
||||
preemphasis=0.97,
|
||||
mel_filters=self.mel_filters,
|
||||
log_mel="log",
|
||||
|
@ -57,6 +57,14 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
|
||||
Size of the Fourier transform.
|
||||
padding_value (`float`, *optional*, defaults to 0.0):
|
||||
Padding value used to pad the audio. Should correspond to silences.
|
||||
dither (`float`, *optional*, defaults to 0.0):
|
||||
Adds dithering. In other words, adds a small Gaussian noise to each frame.
|
||||
E.g. use 0.0001 to add dithering with a normal distribution centered
|
||||
around 0.0 with standard deviation 0.0001 (assuming [-1,+1] range of raw_speech).
|
||||
The value 0.0 means no dithering.
|
||||
Dithering has similar effect as `spectrogram(mel_floor=...)`. It reduces
|
||||
the high log_mel_fbank values for signals with hard-zero sections,
|
||||
when VAD cutoff is present in the signal.
|
||||
"""
|
||||
|
||||
model_input_names = ["input_features"]
|
||||
@ -69,6 +77,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
|
||||
chunk_length=30,
|
||||
n_fft=400,
|
||||
padding_value=0.0,
|
||||
dither=0.0,
|
||||
return_attention_mask=False, # pad inputs to max length with silence token (zero) and no attention mask
|
||||
**kwargs,
|
||||
):
|
||||
@ -85,6 +94,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
|
||||
self.n_samples = chunk_length * sampling_rate
|
||||
self.nb_max_frames = self.n_samples // hop_length
|
||||
self.sampling_rate = sampling_rate
|
||||
self.dither = dither
|
||||
self.mel_filters = mel_filter_bank(
|
||||
num_frequency_bins=1 + n_fft // 2,
|
||||
num_mel_filters=feature_size,
|
||||
@ -114,6 +124,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
|
||||
frame_length=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
power=2.0,
|
||||
dither=self.dither,
|
||||
mel_filters=self.mel_filters,
|
||||
log_mel="log10",
|
||||
)
|
||||
@ -132,6 +143,12 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
|
||||
waveform = torch.from_numpy(waveform).to(device, torch.float32)
|
||||
window = torch.hann_window(self.n_fft, device=device)
|
||||
|
||||
# Note: it would be better to dither the chunked waveform,
|
||||
# so overlapping signal does not get the same dithering.
|
||||
# But, chunking is happening inside pytorch, so it is here.
|
||||
if self.dither != 0.0:
|
||||
waveform += self.dither * torch.randn(waveform.shape, dtype=waveform.dtype, device=waveform.device)
|
||||
|
||||
stft = torch.stft(waveform, self.n_fft, self.hop_length, window=window, return_complex=True)
|
||||
magnitudes = stft[..., :-1].abs() ** 2
|
||||
|
||||
|
@ -969,7 +969,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
if labels is not None:
|
||||
# shift labels to the left and cut last logit token
|
||||
labels = tf.concat(
|
||||
[labels[:, 1:], tf.fill((labels.shape[0], 1), tf.cast(self.config.pad_token_id, labels.dtype))],
|
||||
[labels[:, 1:], tf.fill((labels.shape[0], 1), tf.cast(-100, labels.dtype))],
|
||||
axis=-1,
|
||||
)
|
||||
loss = self.hf_compute_loss(labels, lm_logits)
|
||||
|
@ -691,33 +691,6 @@ class XGLMModel(XGLMPreTrainedModel):
|
||||
)
|
||||
|
||||
|
||||
def xglm_cross_entropy_loss(
|
||||
logits,
|
||||
labels,
|
||||
num_items_in_batch: int = None,
|
||||
ignore_index: int = -100,
|
||||
pad_token_id: int = -100,
|
||||
vocab_size: int = None,
|
||||
):
|
||||
"""
|
||||
Loss function for XGLM that takes into account `num_items_in_batch`
|
||||
"""
|
||||
shift_labels = labels.new_zeros(labels.shape)
|
||||
shift_labels[:, :-1] = labels[:, 1:].clone()
|
||||
shift_labels[:, -1] = pad_token_id
|
||||
# move labels to correct device to enable model parallelism
|
||||
labels = labels.float().to(logits.device)
|
||||
|
||||
logits = logits.view(-1, vocab_size).float()
|
||||
shift_labels = shift_labels.view(-1)
|
||||
|
||||
reduction = "sum" if num_items_in_batch is not None else "mean"
|
||||
loss = nn.functional.cross_entropy(logits, shift_labels, ignore_index=ignore_index, reduction=reduction)
|
||||
if reduction == "sum":
|
||||
loss = loss / num_items_in_batch
|
||||
return loss
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
The XGLM Model transformer with a language modeling head on top (linear layer with weights tied to the input
|
||||
@ -737,8 +710,6 @@ class XGLMForCausalLM(XGLMPreTrainedModel, GenerationMixin):
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
|
||||
self._loss_function = xglm_cross_entropy_loss
|
||||
|
||||
def get_input_embeddings(self):
|
||||
return self.model.embed_tokens
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import enum
|
||||
from collections.abc import Iterable # pylint: disable=g-importing-member
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from ..processing_utils import ProcessingKwargs, Unpack
|
||||
@ -71,6 +72,8 @@ def retrieve_images_in_messages(
|
||||
"""
|
||||
if images is None:
|
||||
images = []
|
||||
elif not isinstance(images, Iterable):
|
||||
images = [images]
|
||||
idx_images = 0
|
||||
retrieved_images = []
|
||||
for message in messages:
|
||||
@ -188,14 +191,15 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
return_full_text=None,
|
||||
return_tensors=None,
|
||||
return_type=None,
|
||||
clean_up_tokenization_spaces=None,
|
||||
stop_sequence=None,
|
||||
continue_final_message=None,
|
||||
**kwargs: Unpack[ProcessingKwargs],
|
||||
):
|
||||
forward_kwargs = {}
|
||||
preprocess_params = {}
|
||||
postprocess_params = {}
|
||||
|
||||
preprocess_params["processing_kwargs"] = kwargs
|
||||
preprocess_params.update(kwargs)
|
||||
|
||||
if timeout is not None:
|
||||
preprocess_params["timeout"] = timeout
|
||||
@ -226,7 +230,16 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
postprocess_params["return_type"] = return_type
|
||||
if continue_final_message is not None:
|
||||
postprocess_params["continue_final_message"] = continue_final_message
|
||||
|
||||
if clean_up_tokenization_spaces is not None:
|
||||
postprocess_params["clean_up_tokenization_spaces"] = clean_up_tokenization_spaces
|
||||
if stop_sequence is not None:
|
||||
stop_sequence_ids = self.processor.tokenizer.encode(stop_sequence, add_special_tokens=False)
|
||||
if len(stop_sequence_ids) > 1:
|
||||
logger.warning_once(
|
||||
"Stopping on a multiple token sequence is not yet supported on transformers. The first token of"
|
||||
" the stop sequence will be used as the stop sequence string in the interim."
|
||||
)
|
||||
generate_kwargs["eos_token_id"] = stop_sequence_ids[0]
|
||||
return preprocess_params, forward_kwargs, postprocess_params
|
||||
|
||||
def __call__(
|
||||
@ -264,6 +277,8 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
return_full_text (`bool`, *optional*, defaults to `True`):
|
||||
If set to `False` only added text is returned, otherwise the full text is returned. Cannot be
|
||||
specified at the same time as `return_text`.
|
||||
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to clean up the potential extra spaces in the text output.
|
||||
continue_final_message( `bool`, *optional*): This indicates that you want the model to continue the
|
||||
last message in the input chat rather than starting a new one, allowing you to "prefill" its response.
|
||||
By default this is `True` when the final message in the input chat has the `assistant` role and
|
||||
@ -315,7 +330,7 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
|
||||
return super().__call__({"images": images, "text": text}, **kwargs)
|
||||
|
||||
def preprocess(self, inputs=None, timeout=None, continue_final_message=None, processing_kwargs=None):
|
||||
def preprocess(self, inputs=None, timeout=None, continue_final_message=None, **processing_kwargs):
|
||||
# In case we only have text inputs
|
||||
if isinstance(inputs, (list, tuple, str)):
|
||||
images = None
|
||||
@ -332,6 +347,7 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
add_generation_prompt=not continue_final_message,
|
||||
continue_final_message=continue_final_message,
|
||||
return_tensors=self.framework,
|
||||
**processing_kwargs,
|
||||
)
|
||||
inputs_text = inputs
|
||||
images = inputs.images
|
||||
@ -340,14 +356,14 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
inputs_text = inputs["text"]
|
||||
images = inputs["images"]
|
||||
|
||||
images = load_images(images)
|
||||
images = load_images(images, timeout=timeout)
|
||||
|
||||
# if batched text inputs, we set padding to True unless specified otherwise
|
||||
if isinstance(text, (list, tuple)) and len(text) > 1:
|
||||
processing_kwargs.setdefault("padding", True)
|
||||
model_inputs = self.processor(
|
||||
images=images, text=text, return_tensors=self.framework, legacy=False, **processing_kwargs
|
||||
).to(dtype=self.torch_dtype)
|
||||
model_inputs = self.processor(images=images, text=text, return_tensors=self.framework, **processing_kwargs).to(
|
||||
dtype=self.torch_dtype
|
||||
)
|
||||
|
||||
model_inputs["text"] = inputs_text
|
||||
|
||||
@ -363,7 +379,9 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
|
||||
return {"generated_sequence": generated_sequence, "prompt_text": prompt_text, "input_ids": input_ids}
|
||||
|
||||
def postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT, continue_final_message=None):
|
||||
def postprocess(
|
||||
self, model_outputs, return_type=ReturnType.FULL_TEXT, continue_final_message=None, **postprocess_kwargs
|
||||
):
|
||||
input_texts = model_outputs["prompt_text"]
|
||||
input_texts = [input_texts] if isinstance(input_texts, (str, Chat)) else input_texts
|
||||
generated_sequence = model_outputs["generated_sequence"]
|
||||
@ -375,8 +393,8 @@ class ImageTextToTextPipeline(Pipeline):
|
||||
]
|
||||
|
||||
# Decode inputs and outputs the same way to remove input text from generated text if present
|
||||
generated_texts = self.processor.post_process_image_text_to_text(generated_sequence)
|
||||
decoded_inputs = self.processor.post_process_image_text_to_text(input_ids)
|
||||
generated_texts = self.processor.post_process_image_text_to_text(generated_sequence, **postprocess_kwargs)
|
||||
decoded_inputs = self.processor.post_process_image_text_to_text(input_ids, **postprocess_kwargs)
|
||||
|
||||
# Force consistent behavior for including the input text in the output
|
||||
if return_type in {ReturnType.NEW_TEXT, ReturnType.FULL_TEXT}:
|
||||
|
@ -1392,7 +1392,7 @@ class ProcessorMixin(PushToHubMixin):
|
||||
return out["input_ids"]
|
||||
return prompt
|
||||
|
||||
def post_process_image_text_to_text(self, generated_outputs):
|
||||
def post_process_image_text_to_text(self, generated_outputs, skip_special_tokens=True, **kwargs):
|
||||
"""
|
||||
Post-process the output of a vlm to decode the text.
|
||||
|
||||
@ -1400,11 +1400,15 @@ class ProcessorMixin(PushToHubMixin):
|
||||
generated_outputs (`torch.Tensor` or `np.ndarray`):
|
||||
The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
|
||||
or `(sequence_length,)`.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
|
||||
**kwargs:
|
||||
Additional arguments to be passed to the tokenizer's `batch_decode method`.
|
||||
|
||||
Returns:
|
||||
`List[str]`: The decoded text.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)
|
||||
return self.tokenizer.batch_decode(generated_outputs, skip_special_tokens=skip_special_tokens, **kwargs)
|
||||
|
||||
|
||||
def _validate_images_text_input_order(images, text):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user