mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 17:48:57 +08:00
Compare commits
12 Commits
update-qua
...
fix_gptq_t
Author | SHA1 | Date | |
---|---|---|---|
120a0bd477 | |||
5e5aa3fad0 | |||
1945f91189 | |||
3a8eb74668 | |||
54be2d7ae8 | |||
286ffaaf0a | |||
861758e235 | |||
42b36d7395 | |||
597efd21d2 | |||
d9e6f307e7 | |||
1867be666d | |||
6a912ff2c5 |
642
.github/workflows/build-docker-images.yml
vendored
642
.github/workflows/build-docker-images.yml
vendored
@ -3,7 +3,7 @@ name: Build docker images (scheduled)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- update-quantization-docker
|
||||
- build_ci_docker_image*
|
||||
repository_dispatch:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@ -18,341 +18,341 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# latest-docker:
|
||||
# name: "Latest PyTorch + TensorFlow [dev]"
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-all-latest-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
|
||||
# # Push CI images still need to be re-built daily
|
||||
# -
|
||||
# name: Build and push (for Push CI) in a daily basis
|
||||
# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-all-latest-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-all-latest-gpu-push-ci
|
||||
latest-docker:
|
||||
name: "Latest PyTorch + TensorFlow [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu-push-ci
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# latest-torch-deepspeed-docker:
|
||||
# name: "Latest PyTorch + DeepSpeed"
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
|
||||
# title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
|
||||
# latest-torch-deepspeed-docker-for-push-ci-daily-build:
|
||||
# name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# # Push CI images still need to be re-built daily
|
||||
# -
|
||||
# name: Build and push (for Push CI) in a daily basis
|
||||
# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
|
||||
latest-torch-deepspeed-docker-for-push-ci-daily-build:
|
||||
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# doc-builder:
|
||||
# name: "Doc builder"
|
||||
# # Push CI doesn't need this image
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-doc-builder
|
||||
# push: true
|
||||
# tags: huggingface/transformers-doc-builder
|
||||
doc-builder:
|
||||
name: "Doc builder"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-doc-builder
|
||||
push: true
|
||||
tags: huggingface/transformers-doc-builder
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# latest-pytorch:
|
||||
# name: "Latest PyTorch [dev]"
|
||||
# # Push CI doesn't need this image
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-gpu
|
||||
latest-pytorch:
|
||||
name: "Latest PyTorch [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-gpu
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# latest-pytorch-amd:
|
||||
# name: "Latest PyTorch (AMD) [dev]"
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-amd-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
|
||||
# # Push CI images still need to be re-built daily
|
||||
# -
|
||||
# name: Build and push (for Push CI) in a daily basis
|
||||
# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-amd-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-amd-gpu-push-ci
|
||||
latest-pytorch-amd:
|
||||
name: "Latest PyTorch (AMD) [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# latest-tensorflow:
|
||||
# name: "Latest TensorFlow [dev]"
|
||||
# # Push CI doesn't need this image
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-tensorflow-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-tensorflow-gpu
|
||||
latest-tensorflow:
|
||||
name: "Latest TensorFlow [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-tensorflow-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-tensorflow-gpu
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# latest-pytorch-deepspeed-amd:
|
||||
# name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
# runs-on:
|
||||
# group: aws-general-8-plus
|
||||
# steps:
|
||||
# -
|
||||
# name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# -
|
||||
# name: Check out code
|
||||
# uses: actions/checkout@v4
|
||||
# -
|
||||
# name: Login to DockerHub
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# -
|
||||
# name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
|
||||
# # Push CI images still need to be re-built daily
|
||||
# -
|
||||
# name: Build and push (for Push CI) in a daily basis
|
||||
# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
# if: inputs.image_postfix != '-push-ci'
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
# build-args: |
|
||||
# REF=main
|
||||
# push: true
|
||||
# tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
|
||||
latest-pytorch-deepspeed-amd:
|
||||
name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
|
||||
|
||||
# - name: Post to Slack
|
||||
# if: always()
|
||||
# uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
# with:
|
||||
# slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
# title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
|
||||
# status: ${{ job.status }}
|
||||
# slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-quantization-torch-docker:
|
||||
name: "Latest Pytorch + Quantization [dev]"
|
||||
|
104
.github/workflows/self-scheduled-caller.yml
vendored
104
.github/workflows/self-scheduled-caller.yml
vendored
@ -7,64 +7,64 @@ on:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- fix_gptq_tests
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-models"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
secrets: inherit
|
||||
# model-ci:
|
||||
# name: Model CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_models_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-models"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-all-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-pytorch-gpu
|
||||
ci_event: Daily CI
|
||||
secrets: inherit
|
||||
# torch-pipeline:
|
||||
# name: Torch pipeline CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_pipelines_torch_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-pytorch-gpu
|
||||
# ci_event: Daily CI
|
||||
# secrets: inherit
|
||||
|
||||
tf-pipeline:
|
||||
name: TF pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_tf_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-tensorflow-gpu
|
||||
ci_event: Daily CI
|
||||
secrets: inherit
|
||||
# tf-pipeline:
|
||||
# name: TF pipeline CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_pipelines_tf_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-pipeline-tf"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-tensorflow-gpu
|
||||
# ci_event: Daily CI
|
||||
# secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-examples"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
secrets: inherit
|
||||
# example-ci:
|
||||
# name: Example CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_examples_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-examples"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-all-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-deepspeed"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
ci_event: Daily CI
|
||||
working-directory-prefix: /workspace
|
||||
secrets: inherit
|
||||
# deepspeed-ci:
|
||||
# name: DeepSpeed CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_torch_cuda_extensions_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-deepspeed"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# working-directory-prefix: /workspace
|
||||
# secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
name: Quantization CI
|
||||
|
@ -0,0 +1,287 @@
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
# This file was automatically generated from examples/modular-transformers/modular_new_imgproc_model.py.
|
||||
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_new_imgproc_model.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
|
||||
from ...image_transforms import convert_to_rgb, resize, to_channel_dimension_format
|
||||
from ...image_utils import (
|
||||
OPENAI_CLIP_MEAN,
|
||||
OPENAI_CLIP_STD,
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
infer_channel_dimension_format,
|
||||
is_scaled_image,
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
import PIL
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class ImgprocModelImageProcessor(BaseImageProcessor):
|
||||
r"""
|
||||
Constructs a NEW_IMGPROC_MODEL image processor.
|
||||
|
||||
Args:
|
||||
do_resize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
|
||||
`do_resize` parameter in the `preprocess` method.
|
||||
size (`dict`, *optional*, defaults to `{"height": 384, "width": 384}`):
|
||||
Size of the output image after resizing. Can be overridden by the `size` parameter in the `preprocess`
|
||||
method.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
|
||||
Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
|
||||
overridden by the `resample` parameter in the `preprocess` method.
|
||||
do_rescale (`bool`, *optional*, defaults to `True`):
|
||||
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
|
||||
`do_rescale` parameter in the `preprocess` method.
|
||||
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
|
||||
Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
|
||||
overridden by the `rescale_factor` parameter in the `preprocess` method.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
|
||||
method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
|
||||
image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
|
||||
Mean to use if normalizing the image. This is a float or list of floats the length of the number of
|
||||
channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
|
||||
overridden by the `image_mean` parameter in the `preprocess` method.
|
||||
image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
|
||||
Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
|
||||
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
|
||||
Can be overridden by the `image_std` parameter in the `preprocess` method.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `True`):
|
||||
Whether to convert the image to RGB.
|
||||
"""
|
||||
|
||||
model_input_names = ["pixel_values"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: Union[int, float] = 1 / 255,
|
||||
do_normalize: bool = True,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_convert_rgb: bool = True,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
super().__init__(**kwargs)
|
||||
size = size if size is not None else {"height": 384, "width": 384}
|
||||
size = get_size_dict(size, default_to_square=True)
|
||||
|
||||
self.do_resize = do_resize
|
||||
self.size = size
|
||||
self.resample = resample
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
|
||||
def resize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
size: Dict[str, int],
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Resize an image to `(size["height"], size["width"])`.
|
||||
|
||||
Args:
|
||||
image (`np.ndarray`):
|
||||
Image to resize.
|
||||
size (`Dict[str, int]`):
|
||||
Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
|
||||
`PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BICUBIC`.
|
||||
data_format (`ChannelDimension` or `str`, *optional*):
|
||||
The channel dimension format for the output image. If unset, the channel dimension format of the input
|
||||
image is used. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
|
||||
input_data_format (`ChannelDimension` or `str`, *optional*):
|
||||
The channel dimension format for the input image. If unset, the channel dimension format is inferred
|
||||
from the input image. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
|
||||
|
||||
Returns:
|
||||
`np.ndarray`: The resized image.
|
||||
"""
|
||||
size = get_size_dict(size)
|
||||
if "height" not in size or "width" not in size:
|
||||
raise ValueError(f"The `size` dictionary must contain the keys `height` and `width`. Got {size.keys()}")
|
||||
output_size = (size["height"], size["width"])
|
||||
return resize(
|
||||
image,
|
||||
size=output_size,
|
||||
resample=resample,
|
||||
data_format=data_format,
|
||||
input_data_format=input_data_format,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
do_resize: Optional[bool] = None,
|
||||
size: Optional[Dict[str, int]] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
do_normalize: Optional[bool] = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
|
||||
Args:
|
||||
images (`ImageInput`):
|
||||
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
|
||||
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
|
||||
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
|
||||
Whether to resize the image.
|
||||
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
|
||||
Controls the size of the image after `resize`. The shortest edge of the image is resized to
|
||||
`size["shortest_edge"]` whilst preserving the aspect ratio. If the longest edge of this resized image
|
||||
is > `int(size["shortest_edge"] * (1333 / 800))`, then the image is resized again to make the longest
|
||||
edge equal to `int(size["shortest_edge"] * (1333 / 800))`.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
|
||||
Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`.
|
||||
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
|
||||
Whether to rescale the image values between [0 - 1].
|
||||
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
|
||||
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
|
||||
Whether to normalize the image.
|
||||
image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
|
||||
Image mean to normalize the image by if `do_normalize` is set to `True`.
|
||||
image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
|
||||
Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
|
||||
Whether to convert the image to RGB.
|
||||
return_tensors (`str` or `TensorType`, *optional*):
|
||||
The type of tensors to return. Can be one of:
|
||||
- Unset: Return a list of `np.ndarray`.
|
||||
- `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
|
||||
- `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
|
||||
- `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
|
||||
- `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
|
||||
data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
|
||||
The channel dimension format for the output image. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
- Unset: Use the channel dimension format of the input image.
|
||||
input_data_format (`ChannelDimension` or `str`, *optional*):
|
||||
The channel dimension format for the input image. If unset, the channel dimension format is inferred
|
||||
from the input image. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
|
||||
"""
|
||||
do_resize = do_resize if do_resize is not None else self.do_resize
|
||||
resample = resample if resample is not None else self.resample
|
||||
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
|
||||
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
|
||||
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
|
||||
image_mean = image_mean if image_mean is not None else self.image_mean
|
||||
image_std = image_std if image_std is not None else self.image_std
|
||||
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
|
||||
|
||||
size = size if size is not None else self.size
|
||||
size = get_size_dict(size, default_to_square=False)
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
validate_preprocess_arguments(
|
||||
do_rescale=do_rescale,
|
||||
rescale_factor=rescale_factor,
|
||||
do_normalize=do_normalize,
|
||||
image_mean=image_mean,
|
||||
image_std=image_std,
|
||||
do_resize=do_resize,
|
||||
size=size,
|
||||
resample=resample,
|
||||
)
|
||||
# PIL RGBA images are converted to RGB
|
||||
if do_convert_rgb:
|
||||
images = [convert_to_rgb(image) for image in images]
|
||||
|
||||
# All transformations expect numpy arrays.
|
||||
images = [to_numpy_array(image) for image in images]
|
||||
|
||||
if is_scaled_image(images[0]) and do_rescale:
|
||||
logger.warning_once(
|
||||
"It looks like you are trying to rescale already rescaled images. If the input"
|
||||
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
|
||||
)
|
||||
|
||||
if input_data_format is None:
|
||||
# We assume that all images have the same channel dimension format.
|
||||
input_data_format = infer_channel_dimension_format(images[0])
|
||||
|
||||
if do_resize:
|
||||
images = [
|
||||
self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format)
|
||||
for image in images
|
||||
]
|
||||
|
||||
if do_rescale:
|
||||
images = [
|
||||
self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format)
|
||||
for image in images
|
||||
]
|
||||
|
||||
if do_normalize:
|
||||
images = [
|
||||
self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format)
|
||||
for image in images
|
||||
]
|
||||
|
||||
images = [
|
||||
to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images
|
||||
]
|
||||
|
||||
encoded_outputs = BatchFeature(data={"pixel_values": images}, tensor_type=return_tensors)
|
||||
|
||||
return encoded_outputs
|
||||
|
||||
def new_image_processing_method(self, pixel_values: torch.FloatTensor):
|
||||
return pixel_values / 2
|
@ -0,0 +1,9 @@
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
|
||||
from transformers.models.blip.image_processing_blip import BlipImageProcessor
|
||||
|
||||
|
||||
class ImgprocModelImageProcessor(BlipImageProcessor):
|
||||
def new_image_processing_method(self, pixel_values: torch.FloatTensor):
|
||||
return pixel_values / 2
|
@ -15,6 +15,7 @@
|
||||
# limitations under the License.
|
||||
import copy
|
||||
import inspect
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
@ -1029,10 +1030,6 @@ class GenerationMixin:
|
||||
"You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "
|
||||
"in favour of `input_ids` or `decoder_input_ids` respectively.",
|
||||
)
|
||||
if generation_config.watermarking_config is not None:
|
||||
processors.append(
|
||||
generation_config.watermarking_config.construct_processor(self.config.vocab_size, device)
|
||||
)
|
||||
|
||||
# TODO (joao): find a strategy to specify the order of the processors
|
||||
processors = self._merge_criteria_processor_list(processors, logits_processor)
|
||||
@ -1085,6 +1082,12 @@ class GenerationMixin:
|
||||
)
|
||||
)
|
||||
|
||||
# Watermarking should be after all logits processing is finished (see #34630)
|
||||
if generation_config.watermarking_config is not None:
|
||||
processors.append(
|
||||
generation_config.watermarking_config.construct_processor(self.config.vocab_size, device)
|
||||
)
|
||||
|
||||
# `LogitNormalization` should always be the last logit processor, when present
|
||||
if generation_config.renormalize_logits is True:
|
||||
processors.append(LogitNormalization())
|
||||
@ -3222,6 +3225,16 @@ class GenerationMixin:
|
||||
unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=input_ids.device)
|
||||
model_kwargs = self._get_initial_cache_position(input_ids, model_kwargs)
|
||||
|
||||
def model_forward(model, *args, **kwargs):
|
||||
return model.forward(*args, **kwargs)
|
||||
|
||||
if isinstance(model_kwargs.get("past_key_values"), StaticCache):
|
||||
if self.device.type == "cuda":
|
||||
logger.warning_once("Using `torch.compile`.")
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "0"
|
||||
model_forward = torch.compile(model_forward, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
i = 0
|
||||
while self._has_unfinished_sequences(
|
||||
this_peer_finished, synced_gpus, device=input_ids.device, cur_len=cur_len, max_length=max_length
|
||||
):
|
||||
@ -3232,8 +3245,11 @@ class GenerationMixin:
|
||||
model_inputs.update({"output_attentions": output_attentions} if output_attentions else {})
|
||||
model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
|
||||
|
||||
# forward pass to get next token
|
||||
outputs = self(**model_inputs, return_dict=True)
|
||||
if i == 0:
|
||||
outputs = self(**model_inputs, return_dict=True)
|
||||
i += 1
|
||||
else:
|
||||
outputs = model_forward(self, return_dict=True, **model_inputs)
|
||||
|
||||
# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping
|
||||
model_kwargs = self._update_model_kwargs_for_generation(
|
||||
|
@ -52,7 +52,6 @@ from .pytorch_utils import ( # noqa: F401
|
||||
find_pruneable_heads_and_indices,
|
||||
id_tensor_storage,
|
||||
is_torch_greater_or_equal_than_1_13,
|
||||
is_torch_greater_or_equal_than_2_4,
|
||||
prune_conv1d_layer,
|
||||
prune_layer,
|
||||
prune_linear_layer,
|
||||
@ -90,6 +89,7 @@ from .utils import (
|
||||
is_peft_available,
|
||||
is_remote_url,
|
||||
is_safetensors_available,
|
||||
is_torch_greater_or_equal,
|
||||
is_torch_sdpa_available,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
@ -5032,7 +5032,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
device_mesh (`torch.distributed.DeviceMesh`):
|
||||
The device mesh to use for tensor parallelism.
|
||||
"""
|
||||
if not is_torch_greater_or_equal_than_2_4:
|
||||
if not is_torch_greater_or_equal("2.5"):
|
||||
raise EnvironmentError("tensor parallel is only supported for `torch>=2.5`.")
|
||||
|
||||
# Tensor parallelize a nn.Module based on the `_tp_plan` attribute of the module.
|
||||
|
@ -38,6 +38,7 @@ from ...utils import TensorType, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
MAX_IMAGE_SIZE = 4096 # 4k resolution as absolute maximum
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@ -116,7 +117,6 @@ def _resize_output_size_scale_below_upper_bound(
|
||||
def get_resize_output_image_size(
|
||||
image,
|
||||
resolution_max_side: int,
|
||||
max_image_size: int = 1820,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
) -> Tuple[int, int]:
|
||||
"""
|
||||
@ -126,24 +126,18 @@ def get_resize_output_image_size(
|
||||
Image to resize.
|
||||
resolution_max_side (`int`):
|
||||
The longest edge of the image will be resized to this value. The shortest edge will be resized to keep the
|
||||
input aspect ratio, with a lower bound of `min_image_size`.
|
||||
max_image_size (`int`, *optional*, defaults to 1820):
|
||||
Maximum image resolution. If the image is larger than this size, the longest edge will be resized to this
|
||||
value, with the shortest edge resized to keep the input aspect ratio, with a lower bound of `min_image_size`.
|
||||
input aspect ratio.
|
||||
input_data_format (`ChannelDimension` or `str`):
|
||||
The channel dimension format of the input image.
|
||||
Returns:
|
||||
The output size of the image after resizing.
|
||||
"""
|
||||
if resolution_max_side > max_image_size:
|
||||
raise ValueError("`resolution_max_side` cannot be larger than `max_image_size`")
|
||||
|
||||
height, width = get_image_size(image, channel_dim=input_data_format)
|
||||
|
||||
# Find the output size, when rescaling the longest edge to max_len and preserving the aspect ratio
|
||||
height, width = _resize_output_size_rescale_to_max_len(height, width, max_len=resolution_max_side)
|
||||
# Find the output size when scaling the image to be below the max_image_size
|
||||
height, width = _resize_output_size_scale_below_upper_bound(height, width, max_len=max_image_size)
|
||||
# Find the output size when scaling the image to be below the MAX_IMAGE_SIZE
|
||||
height, width = _resize_output_size_scale_below_upper_bound(height, width, max_len=MAX_IMAGE_SIZE)
|
||||
return height, width
|
||||
|
||||
|
||||
@ -251,7 +245,7 @@ def convert_to_rgb(
|
||||
data_format = input_data_format if data_format is None else data_format
|
||||
|
||||
mode = "P" if palette is not None else None
|
||||
image = to_pil_image(image, image_mode=mode)
|
||||
image = to_pil_image(image, image_mode=mode, input_data_format=input_data_format)
|
||||
if image.mode == "P" and palette is not None:
|
||||
image.putpalette(palette)
|
||||
|
||||
@ -404,7 +398,7 @@ class Idefics3ImageProcessor(BaseImageProcessor):
|
||||
image_mode = None
|
||||
if image.ndim == 2 or image.shape[-1] == 1:
|
||||
image_mode = "P"
|
||||
image = to_pil_image(image, image_mode=image_mode)
|
||||
image = to_pil_image(image, image_mode=image_mode, input_data_format=input_data_format)
|
||||
|
||||
resized_image = image.resize((size[1], size[0]), resample=resample)
|
||||
resized_image = np.array(resized_image)
|
||||
@ -754,6 +748,16 @@ class Idefics3ImageProcessor(BaseImageProcessor):
|
||||
# All transformations expect numpy arrays.
|
||||
images_list = [[to_numpy_array(image) for image in images] for images in images_list]
|
||||
|
||||
# Extra channel dimension for grayscale images
|
||||
if input_data_format in [ChannelDimension.LAST, None]:
|
||||
images_list = [
|
||||
[np.expand_dims(img, axis=-1) if img.ndim == 2 else img for img in images] for images in images_list
|
||||
]
|
||||
elif input_data_format == ChannelDimension.FIRST:
|
||||
images_list = [
|
||||
[np.expand_dims(img, axis=0) if img.ndim == 2 else img for img in images] for images in images_list
|
||||
]
|
||||
|
||||
if is_scaled_image(images_list[0][0]) and do_rescale:
|
||||
logger.warning_once(
|
||||
"It looks like you are trying to rescale already rescaled images. If the input"
|
||||
@ -764,18 +768,6 @@ class Idefics3ImageProcessor(BaseImageProcessor):
|
||||
if input_data_format is None:
|
||||
input_data_format = infer_channel_dimension_format(images_list[0][0], num_channels=(1, 3, 4))
|
||||
|
||||
# Extra channel dimension for grayscale images
|
||||
if input_data_format == ChannelDimension.LAST:
|
||||
images_list = [
|
||||
[np.expand_dims(img, axis=-1) if img.ndim == 2 else img for img in images] for images in images_list
|
||||
]
|
||||
elif input_data_format == ChannelDimension.FIRST:
|
||||
images_list = [
|
||||
[np.expand_dims(img, axis=0) if img.ndim == 2 else img for img in images] for images in images_list
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Invalid channel dimension format {input_data_format}.")
|
||||
|
||||
if do_resize:
|
||||
images_list = [
|
||||
[
|
||||
|
@ -21,7 +21,7 @@ from packaging import version
|
||||
from safetensors.torch import storage_ptr, storage_size
|
||||
from torch import nn
|
||||
|
||||
from .utils import is_torch_xla_available, logging
|
||||
from .utils import is_torch_greater_or_equal, is_torch_xla_available, logging
|
||||
|
||||
|
||||
ALL_LAYERNORM_LAYERS = [nn.LayerNorm]
|
||||
@ -39,7 +39,7 @@ is_torch_greater_or_equal_than_1_13 = parsed_torch_version_base >= version.parse
|
||||
is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
|
||||
|
||||
|
||||
if is_torch_greater_or_equal_than_2_4:
|
||||
if is_torch_greater_or_equal("2.5"):
|
||||
from torch.distributed.tensor import Replicate
|
||||
from torch.distributed.tensor.parallel import (
|
||||
ColwiseParallel,
|
||||
|
@ -215,6 +215,9 @@ class HfQuantizer(ABC):
|
||||
|
||||
# Delete quantizer and quantization config
|
||||
del model.hf_quantizer
|
||||
del model.config.quantization_config
|
||||
del model.config._pre_quantization_dtype
|
||||
model.is_quantized = False
|
||||
|
||||
return model
|
||||
|
||||
|
@ -53,6 +53,20 @@ class EetqHfQuantizer(HfQuantizer):
|
||||
"Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQ"
|
||||
)
|
||||
|
||||
try:
|
||||
import eetq # noqa: F401
|
||||
except ImportError as exc:
|
||||
if "shard_checkpoint" in str(exc):
|
||||
# EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
|
||||
# shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
|
||||
# TODO: Update message once eetq releases a fix
|
||||
raise ImportError(
|
||||
"You are using a version of EETQ that is incompatible with the current transformers version. "
|
||||
"Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0."
|
||||
) from exc
|
||||
else:
|
||||
raise
|
||||
|
||||
if not is_accelerate_available():
|
||||
raise ImportError("Loading an EETQ quantized model requires accelerate (`pip install accelerate`)")
|
||||
|
||||
|
@ -1143,7 +1143,17 @@ def require_eetq(test_case):
|
||||
"""
|
||||
Decorator marking a test that requires eetq
|
||||
"""
|
||||
return unittest.skipUnless(is_eetq_available(), "test requires eetq")(test_case)
|
||||
eetq_available = is_eetq_available()
|
||||
if eetq_available:
|
||||
try:
|
||||
import eetq # noqa: F401
|
||||
except ImportError as exc:
|
||||
if "shard_checkpoint" in str(exc):
|
||||
# EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
|
||||
# shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
|
||||
# TODO: Remove once eetq releases a fix and this release is used in CI
|
||||
eetq_available = False
|
||||
return unittest.skipUnless(eetq_available, "test requires eetq")(test_case)
|
||||
|
||||
|
||||
def require_av(test_case):
|
||||
@ -1184,9 +1194,19 @@ def require_tensorboard(test_case):
|
||||
|
||||
def require_auto_gptq(test_case):
|
||||
"""
|
||||
Decorator for auto_gptq dependency
|
||||
Decorator for auto_gptq dependency
|
||||
"""
|
||||
return unittest.skipUnless(is_auto_gptq_available(), "test requires auto-gptq")(test_case)
|
||||
eetq_available = is_eetq_available()
|
||||
if eetq_available:
|
||||
try:
|
||||
import eetq # noqa: F401
|
||||
except ImportError as exc:
|
||||
if "shard_checkpoint" in str(exc):
|
||||
# EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
|
||||
# shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
|
||||
# TODO: Remove once eetq releases a fix and this release is used in CI
|
||||
eetq_available = False
|
||||
return unittest.skipUnless(is_auto_gptq_available() and eetq_available, "test requires auto-gptq")(test_case)
|
||||
|
||||
|
||||
def require_auto_awq(test_case):
|
||||
|
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import gc
|
||||
import inspect
|
||||
@ -2450,6 +2451,58 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
self.assertTrue(n_matches.item() == 2)
|
||||
self.assertTrue(validated_tokens.tolist()[0] == [1, 4, 8])
|
||||
|
||||
def test_speculative_sampling_target_distribution(self):
|
||||
"""
|
||||
Asserts that the target distribution is preserved.
|
||||
Should help with catching issues like #32867.
|
||||
"""
|
||||
# assume vocab size 10, input length 5 + 3 generated candidates
|
||||
candidate_input_ids = torch.tensor([[8, 0, 3, 9, 8, 1, 4, 5]]) # input tokens
|
||||
candidate_logits = torch.tensor(
|
||||
[
|
||||
[
|
||||
[-10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0], # generated 1
|
||||
[-10.0, -10.0, -10.0, -10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -10.0], # generated 4
|
||||
[-10.0, -10.0, -10.0, -10.0, -10.0, 10.0, -10.0, -10.0, -10.0, -10.0], # generated 5
|
||||
]
|
||||
]
|
||||
)
|
||||
candidate_length = 3
|
||||
inf = float("inf")
|
||||
new_logits = torch.tensor(
|
||||
[
|
||||
[
|
||||
# accepts 1:
|
||||
[-inf, 10.0, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
|
||||
# accepts 4:
|
||||
[-inf, -inf, -inf, -inf, 10.0, -inf, -inf, -inf, -inf, -inf],
|
||||
# most likely to be 1 or 8, less likely to be 3, then 7, and should never be any other value:
|
||||
[-inf, 2.0, -inf, 1.0, -inf, -inf, -inf, -0.01, 2.0, -inf],
|
||||
# N/A:
|
||||
[-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
|
||||
]
|
||||
]
|
||||
)
|
||||
last_assistant_token_is_eos = False
|
||||
last_validated_token = []
|
||||
for _ in range(10_000):
|
||||
validated_tokens, n_matches = _speculative_sampling(
|
||||
candidate_input_ids,
|
||||
candidate_logits,
|
||||
candidate_length,
|
||||
new_logits,
|
||||
last_assistant_token_is_eos,
|
||||
)
|
||||
self.assertTrue(n_matches.item() == 2)
|
||||
self.assertTrue(validated_tokens.tolist()[0][0] == 1)
|
||||
self.assertTrue(validated_tokens.tolist()[0][1] == 4)
|
||||
self.assertTrue(validated_tokens.tolist()[0][2] in [1, 3, 7, 8])
|
||||
last_validated_token.append(validated_tokens.tolist()[0][2])
|
||||
# check that the most likely tokens are selected more often than the less likely ones
|
||||
last_token_counts = collections.Counter(last_validated_token)
|
||||
self.assertTrue(last_token_counts[1] > last_token_counts[3] > last_token_counts[7] > 0)
|
||||
self.assertTrue(last_token_counts[8] > last_token_counts[3])
|
||||
|
||||
|
||||
@pytest.mark.generate
|
||||
@require_torch
|
||||
|
@ -65,7 +65,7 @@ class BitNetTest(unittest.TestCase):
|
||||
"""
|
||||
Load the model
|
||||
"""
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name)
|
||||
cls.quantized_model = AutoModelForCausalLM.from_pretrained(cls.model_name, device_map=cls.device)
|
||||
|
||||
def tearDown(self):
|
||||
|
@ -1192,7 +1192,7 @@ class ModularFileMapper(ModuleMapper):
|
||||
self.visited_modules = {}
|
||||
self.renamers = {}
|
||||
for file, module in self.model_specific_modules.items():
|
||||
file_model_name = re.search(r"models\.\w*?\.\w*?_(\S*)", file).groups()[0]
|
||||
file_model_name = file.split(".")[-2]
|
||||
renamer = ReplaceNameTransformer(
|
||||
file_model_name, self.model_name, self.given_old_name, self.given_new_name
|
||||
)
|
||||
|
Reference in New Issue
Block a user