Release: v4.3.1

Deprecate Wav2Vec2ForMaskedLM and add Wav2Vec2ForCTC (#10089 )
* add wav2vec2CTC and deprecate for maskedlm * remove from docs
2025-10-21 01:23:56 +08:00 · 2021-02-09 09:55:55 +01:00 · 2021-02-09 09:55:55 +01:00 · 2021-02-08 18:31:49 +01:00 · 2021-02-08 18:29:16 +01:00 · 2021-02-08 18:18:26 +01:00
5544 changed files with 202975 additions and 1724086 deletions
--- a/.circleci/TROUBLESHOOT.md
+++ b/.circleci/TROUBLESHOOT.md
@ -1,7 +0,0 @@
-# Troubleshooting
-
-This is a document explaining how to deal with various issues on Circle-CI. The entries may include actual solutions or pointers to Issues that cover those.
-
-## Circle CI
-
-* pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1,226 +1,435 @@
 version: 2.1
-setup: true
 orbs:
-    continuation: circleci/continuation@0.1.0
+    gcp-gke: circleci/gcp-gke@1.0.4
+    go: circleci/go@1.3.0
+
+
+# TPU REFERENCES
+references:
+    checkout_ml_testing: &checkout_ml_testing
+        run:
+            name: Checkout ml-testing-accelerators
+            command: |
+                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
+                cd ml-testing-accelerators
+                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
+                git checkout stable
+    build_push_docker: &build_push_docker
+        run:
+            name: Configure Docker
+            command: |
+                gcloud --quiet auth configure-docker
+                cd docker/transformers-pytorch-tpu
+                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
+                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
+    deploy_cluster: &deploy_cluster
+        run:
+            name: Deploy the job on the kubernetes cluster
+            command: |
+                go get github.com/google/go-jsonnet/cmd/jsonnet && \
+                export PATH=$PATH:$HOME/go/bin && \
+                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
+                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
+                job_name=${job_name#job.batch/} && \
+                job_name=${job_name% created} && \
+                echo "Waiting on kubernetes job: $job_name" && \
+                i=0 && \
+                # 30 checks spaced 30s apart = 900s total.
+                max_checks=30 && \
+                status_code=2 && \
+                # Check on the job periodically. Set the status code depending on what
+                # happened to the job in Kubernetes. If we try max_checks times and
+                # still the job hasn't finished, give up and return the starting
+                # non-zero status code.
+                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
+                echo "Done waiting. Job status code: $status_code" && \
+                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
+                echo "GKE pod name: $pod_name" && \
+                kubectl logs -f $pod_name --container=train
+                echo "Done with log retrieval attempt." && \
+                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
+                exit $status_code
+    delete_gke_jobs: &delete_gke_jobs
+        run:
+            name: Delete GKE Jobs
+            command: |
+                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
+                # that has been around longer than 1hr. First print all columns for
+                # matches, then execute the delete.
+                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
+                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')
+
+

-parameters:
-    nightly:
-        type: boolean
-        default: false
-    GHA_Actor:
-        type: string
-        default: ""
-    GHA_Action:
-        type: string
-        default: ""
-    GHA_Event:
-        type: string
-        default: ""
-    GHA_Meta:
-        type: string
-        default: ""

 jobs:
-    # Ensure running with CircleCI/huggingface
-    check_circleci_user:
-        docker:
-            - image: python:3.10-slim
-        resource_class: small
-        parallelism: 1
-        steps:
-            - run: echo $CIRCLE_PROJECT_USERNAME
-            - run: |
-                if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
-                    exit 0
-                else
-                    echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
-                fi
-    # Fetch the tests to run
-    fetch_tests:
+    run_tests_torch_and_tf:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
+            - image: circleci/python:3.6
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -U -e .
-            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
-            - run: mkdir -p test_preparation
-            - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
-            - run: python utils/tests_fetcher.py --filter_tests
-            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
-            - run: |
-                if [ ! -s test_preparation/generated_config.yml ]; then
-                    echo "No tests to run, exiting early!"
-                    circleci-agent step halt
-                fi
-
+            - restore_cache:
+                  keys:
+                      - v0.4-torch_and_tf-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece]
+            - run: pip install tapas torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cpu.html
+            - save_cache:
+                key: v0.4-{{ checksum "setup.py" }}
+                paths:
+                    - '~/.cache/pip'
+            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt
            - store_artifacts:
-                path: test_preparation
-
-            - run:
-                name: "Retrieve Artifact Paths"
-                # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
-                # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
-                command: |
-                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
-                    job_number=${CIRCLE_BUILD_NUM}
-                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
-                    curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
-            - run:
-                name: "Prepare pipeline parameters"
-                command: |
-                    python utils/process_test_artifacts.py
-
-            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
-            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
-            # We used:
-
-            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
-            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-
+                  path: ~/transformers/tests_output.txt
            - store_artifacts:
-                path: test_preparation/transformed_artifacts.json
-            - store_artifacts:
-                path: test_preparation/artifacts.json
-            - continuation/continue:
-                parameters:  test_preparation/transformed_artifacts.json
-                configuration_path: test_preparation/generated_config.yml
+                  path: ~/transformers/reports

-    # To run all tests for the nightly build
-    fetch_all_tests:
+    run_tests_torch:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
+            - image: circleci/python:3.7
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -U -e .
-            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
-            - run: mkdir -p test_preparation
-            - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
-            - run: python utils/tests_fetcher.py --filter_tests
-            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
+            - restore_cache:
+                  keys:
+                      - v0.4-torch-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,torch,testing,sentencepiece]
+            - run: pip install tapas torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cpu.html
+            - save_cache:
+                  key: v0.4-torch-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=tests_torch ./tests/ | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_tf:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-tf-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
+            - save_cache:
+                  key: v0.4-tf-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_tf ./tests/ | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_flax:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - restore_cache:
+                keys:
+                    - v0.4-flax-{{ checksum "setup.py" }}
+                    - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: sudo pip install .[flax,sklearn,torch,testing,sentencepiece]
+            - save_cache:
+                  key: v0.4-flax-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_flax ./tests/ | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_pipelines_torch:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-torch-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,torch,testing,sentencepiece]
+            - run: pip install tapas torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cpu.html
+            - save_cache:
+                  key: v0.4-torch-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_pipelines_tf:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-tf-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
+            - save_cache:
+                  key: v0.4-tf-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_custom_tokenizers:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        environment:
+            RUN_CUSTOM_TOKENIZERS: yes
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-custom_tokenizers-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[ja,testing,sentencepiece]
+            - run: python -m unidic download
+            - save_cache:
+                  key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_examples_torch:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.6
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-torch_examples-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install .[sklearn,torch,sentencepiece,testing]
+            - run: pip install -r examples/_tests_requirements.txt
+            - save_cache:
+                  key: v0.4-torch_examples-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
+            - store_artifacts:
+                  path: ~/transformers/examples_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+
+    run_tests_git_lfs:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.7
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - run: sudo apt-get install git-lfs
            - run: |
-                if [ ! -s test_preparation/generated_config.yml ]; then
-                    echo "No tests to run, exiting early!"
-                    circleci-agent step halt
-                fi
+                git config --global user.email "ci@dummy.com"
+                git config --global user.name "ci"
+            - run: pip install --upgrade pip
+            - run: pip install .[testing]
+            - run: RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest"

+    build_doc:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.6
+        steps:
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-build_doc-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install ."[all, docs]"
+            - save_cache:
+                  key: v0.4-build_doc-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: cd docs && make html SPHINXOPTS="-W"
            - store_artifacts:
-                path: test_preparation
+                path: ./docs/_build

-            - run:
-                name: "Retrieve Artifact Paths"
-                command: |
-                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
-                    job_number=${CIRCLE_BUILD_NUM}
-                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
-                    curl -o  test_preparation/artifacts.json ${url}
-            - run:
-                name: "Prepare pipeline parameters"
-                command: |
-                    python utils/process_test_artifacts.py
-
-            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
-            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
-            # We used:
-
-            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
-            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-
-            - store_artifacts:
-                path: test_preparation/transformed_artifacts.json
-            - store_artifacts:
-                path: test_preparation/artifacts.json
-            - continuation/continue:
-                parameters:  test_preparation/transformed_artifacts.json
-                configuration_path: test_preparation/generated_config.yml
+    deploy_doc:
+        working_directory: ~/transformers
+        docker:
+            - image: circleci/python:3.6
+        steps:
+            - add_ssh_keys:
+                fingerprints:
+                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
+            - checkout
+            - restore_cache:
+                  keys:
+                      - v0.4-deploy_doc-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install ."[all,docs]"
+            - save_cache:
+                  key: v0.4-deploy_doc-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: ./.circleci/deploy.sh

    check_code_quality:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
-        resource_class: large
-        environment:
-            TRANSFORMERS_IS_CI: yes
-            PYTEST_TIMEOUT: 120
+            - image: circleci/python:3.6
+        resource_class: medium
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e ".[quality]"
-            - run:
-                name: Show installed libraries and their versions
-                command: pip freeze | tee installed.txt
-            - store_artifacts:
-                  path: ~/transformers/installed.txt
-            - run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
-            - run: ruff check examples tests src utils
-            - run: ruff format examples tests src utils --check
-            - run: python utils/custom_init_isort.py --check_only
-            - run: python utils/sort_auto_mappings.py --check_only
-            - run: python utils/check_doc_toc.py
-            - run: python utils/check_docstrings.py --check_all
+            - restore_cache:
+                  keys:
+                      - v0.4-code_quality-{{ checksum "setup.py" }}
+                      - v0.4-{{ checksum "setup.py" }}
+            - run: pip install --upgrade pip
+            - run: pip install isort
+            - run: pip install .[all,quality]
+            - save_cache:
+                  key: v0.4-code_quality-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - run: black --check examples tests src utils
+            - run: isort --check-only examples tests src utils
+            - run: flake8 examples tests src utils
+            - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
+            - run: python utils/check_copies.py
+            - run: python utils/check_table.py
+            - run: python utils/check_dummies.py
+            - run: python utils/check_repo.py

    check_repository_consistency:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-consistency
-        resource_class: large
-        environment:
-            TRANSFORMERS_IS_CI: yes
-            PYTEST_TIMEOUT: 120
+            - image: circleci/python:3.6
+        resource_class: small
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e ".[quality]"
-            - run:
-                name: Show installed libraries and their versions
-                command: pip freeze | tee installed.txt
-            - store_artifacts:
-                  path: ~/transformers/installed.txt
-            - run: python utils/check_copies.py
-            - run: python utils/check_modular_conversion.py
-            - run: python utils/check_dummies.py
-            - run: python utils/check_repo.py
-            - run: python utils/check_inits.py
-            - run: python utils/check_config_docstrings.py
-            - run: python utils/check_config_attributes.py
-            - run: python utils/check_doctest_list.py
-            - run: make deps_table_check_updated
-            - run: python utils/update_metadata.py --check-only
-            - run: python utils/check_docstrings.py
+            - run: pip install requests
+            - run: python ./utils/link_tester.py

+# TPU JOBS
+    run_examples_tpu:
+        docker:
+            - image: circleci/python:3.6
+        environment:
+            OMP_NUM_THREADS: 1
+        resource_class: xlarge
+        parallelism: 1
+        steps:
+            - checkout
+            - go/install
+            - *checkout_ml_testing
+            - gcp-gke/install
+            - gcp-gke/update-kubeconfig-with-credentials:
+                  cluster: $GKE_CLUSTER
+                  perform-login: true
+            - setup_remote_docker
+            - *build_push_docker
+            - *deploy_cluster
+
+    cleanup-gke-jobs:
+        docker:
+            - image: circleci/python:3.6
+        steps:
+            - gcp-gke/install
+            - gcp-gke/update-kubeconfig-with-credentials:
+                  cluster: $GKE_CLUSTER
+                  perform-login: true
+            - *delete_gke_jobs
+
+workflow_filters: &workflow_filters
+    filters:
+        branches:
+            only:
+                - master
 workflows:
    version: 2
-    setup_and_quality:
-        when:
-            and:
-                - equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
-                - not: <<pipeline.parameters.nightly>>
+    build_and_test:
        jobs:
-            - check_circleci_user
            - check_code_quality
            - check_repository_consistency
-            - fetch_tests
-
-    setup_and_quality_2:
-        when:
-            not:
-                 equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
-        jobs:
-            - check_circleci_user
-            - check_code_quality
-            - check_repository_consistency
-            - fetch_tests:
-                # [reference] https://circleci.com/docs/contexts/
-                context:
-                    - TRANSFORMERS_CONTEXT
-
-    nightly:
-        when: <<pipeline.parameters.nightly>>
-        jobs:
-            - check_circleci_user
-            - check_code_quality
-            - check_repository_consistency
-            - fetch_all_tests
+            - run_examples_torch
+            - run_tests_custom_tokenizers
+            - run_tests_torch_and_tf
+            - run_tests_torch
+            - run_tests_tf
+            - run_tests_flax
+            - run_tests_pipelines_torch
+            - run_tests_pipelines_tf
+            - run_tests_git_lfs
+            - build_doc
+            - deploy_doc: *workflow_filters
+#    tpu_testing_jobs:
+#        triggers:
+#            - schedule:
+#                # Set to run at the first minute of every hour.
+#                cron: "0 8 * * *"
+#                filters:
+#                    branches:
+#                        only:
+#                            - master
+#        jobs:
+#            - cleanup-gke-jobs
+#            - run_examples_tpu
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -1,428 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import copy
-import os
-import random
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
-import glob
-import yaml
-
-
-COMMON_ENV_VARIABLES = {
-    "OMP_NUM_THREADS": 1,
-    "TRANSFORMERS_IS_CI": True,
-    "PYTEST_TIMEOUT": 120,
-    "RUN_PIPELINE_TESTS": False,
-    # will be adjust in `CircleCIJob.to_dict`.
-    "RUN_FLAKY": True,
-}
-# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
-COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
-DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
-
-# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
-# to rerun the tests that match these patterns.
-FLAKY_TEST_FAILURE_PATTERNS = [
-    "OSError",  # Machine/connection transient error
-    "Timeout",  # Machine/connection transient error
-    "ConnectionError",  # Connection transient error
-    "FileNotFoundError",  # Raised by `datasets` on Hub failures
-    "PIL.UnidentifiedImageError",  # Raised by `PIL.Image.open` on connection issues
-    "HTTPError",  # Also catches HfHubHTTPError
-    "AssertionError: Tensor-likes are not close!",  # `torch.testing.assert_close`, we might have unlucky random values
-    # TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
-    # them under a single message.
-    "TypeError: expected str, bytes or os.PathLike object, not NoneType",
-    "TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
-    "Converting from Tiktoken failed",
-    "KeyError: <class ",
-    "TypeError: not a string",
-]
-
-
-class EmptyJob:
-    job_name = "empty"
-
-    def to_dict(self):
-        steps = [{"run": 'ls -la'}]
-        if self.job_name == "collection_job":
-            steps.extend(
-                [
-                    "checkout",
-                    {"run": "pip install requests || true"},
-                    {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
-                    {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
-                    {"store_artifacts": {"path": "outputs"}},
-                    {"run": 'echo "All required jobs have now completed"'},
-                ]
-            )
-
-        return {
-            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
-            "resource_class": "small",
-            "steps": steps,
-        }
-
-
-@dataclass
-class CircleCIJob:
-    name: str
-    additional_env: Dict[str, Any] = None
-    docker_image: List[Dict[str, str]] = None
-    install_steps: List[str] = None
-    marker: Optional[str] = None
-    parallelism: Optional[int] = 0
-    pytest_num_workers: int = 8
-    pytest_options: Dict[str, Any] = None
-    resource_class: Optional[str] = "xlarge"
-    tests_to_run: Optional[List[str]] = None
-    num_test_files_per_worker: Optional[int] = 10
-    # This should be only used for doctest job!
-    command_timeout: Optional[int] = None
-
-    def __post_init__(self):
-        # Deal with defaults for mutable attributes.
-        if self.additional_env is None:
-            self.additional_env = {}
-        if self.docker_image is None:
-            # Let's avoid changing the default list and make a copy.
-            self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
-        else:
-            # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
-            print(os.environ.get("GIT_COMMIT_MESSAGE"))
-            if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
-                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
-            print(f"Using {self.docker_image} docker image")
-        if self.install_steps is None:
-            self.install_steps = ["uv venv && uv pip install ."]
-        if self.pytest_options is None:
-            self.pytest_options = {}
-        if isinstance(self.tests_to_run, str):
-            self.tests_to_run = [self.tests_to_run]
-        else:
-            test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
-            print("Looking for ", test_file)
-            if os.path.exists(test_file):
-                with open(test_file) as f:
-                    expanded_tests = f.read().strip().split("\n")
-                self.tests_to_run = expanded_tests
-                print("Found:", expanded_tests)
-            else:
-                self.tests_to_run = []
-                print("not Found")
-
-    def to_dict(self):
-        env = COMMON_ENV_VARIABLES.copy()
-        # Do not run tests decorated by @is_flaky on pull requests
-        env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
-        env.update(self.additional_env)
-
-        job = {
-            "docker": self.docker_image,
-            "environment": env,
-        }
-        if self.resource_class is not None:
-            job["resource_class"] = self.resource_class
-
-        all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
-        pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
-        pytest_flags.append(
-            f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
-        )
-                # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
-        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
-        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
-        junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
-        joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
-        repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
-        parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
-        steps = [
-            "checkout",
-            {"attach_workspace": {"at": "test_preparation"}},
-            {"run": "apt-get update && apt-get install -y curl"},
-            {"run": " && ".join(self.install_steps)},
-            {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
-            {"run": {
-                    "name": "Show installed libraries and their size",
-                    "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
-            },
-            {"run": {
-                "name": "Show installed libraries and their versions",
-                "command": """pip list --format=freeze | tee installed.txt || true"""}
-            },
-            {"run": {
-                "name": "Show biggest libraries",
-                "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
-            },
-            {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
-            {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
-                        {"run": {"name": "Split tests across parallel nodes: show current parallel tests",
-                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
-                    }
-            },
-            {"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}},
-            {"run": {
-                "name": "Run tests",
-                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
-            },
-            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
-            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
-            {"run": {"name": "Errors",                       "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
-            {"store_test_results": {"path": "test-results"}},
-            {"store_artifacts": {"path": "test-results/junit.xml"}},
-            {"store_artifacts": {"path": "reports"}},
-            {"store_artifacts": {"path": "tests.txt"}},
-            {"store_artifacts": {"path": "splitted_tests.txt"}},
-            {"store_artifacts": {"path": "installed.txt"}},
-        ]
-        if self.parallelism:
-            job["parallelism"] = parallel
-        job["steps"] = steps
-        return job
-
-    @property
-    def job_name(self):
-        return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
-
-
-# JOBS
-torch_job = CircleCIJob(
-    "torch",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    marker="not generate",
-    parallelism=6,
-)
-
-generate_job = CircleCIJob(
-    "generate",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    # networkx==3.3 (after #36957) cause some issues
-    # TODO: remove this once it works directly
-    install_steps=["uv venv && uv pip install ."],
-    marker="generate",
-    parallelism=6,
-)
-
-tokenization_job = CircleCIJob(
-    "tokenization",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    parallelism=8,
-)
-
-processor_job = CircleCIJob(
-    "processors",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    parallelism=8,
-)
-
-tf_job = CircleCIJob(
-    "tf",
-    docker_image=[{"image":"huggingface/transformers-tf-light"}],
-    parallelism=6,
-)
-
-
-flax_job = CircleCIJob(
-    "flax",
-    docker_image=[{"image":"huggingface/transformers-jax-light"}],
-    parallelism=6,
-    pytest_num_workers=16,
-    resource_class="2xlarge",
-)
-
-
-pipelines_torch_job = CircleCIJob(
-    "pipelines_torch",
-    additional_env={"RUN_PIPELINE_TESTS": True},
-    docker_image=[{"image":"huggingface/transformers-torch-light"}],
-    marker="is_pipeline_test",
-    parallelism=4,
-)
-
-
-pipelines_tf_job = CircleCIJob(
-    "pipelines_tf",
-    additional_env={"RUN_PIPELINE_TESTS": True},
-    docker_image=[{"image":"huggingface/transformers-tf-light"}],
-    marker="is_pipeline_test",
-    parallelism=4,
-)
-
-
-custom_tokenizers_job = CircleCIJob(
-    "custom_tokenizers",
-    additional_env={"RUN_CUSTOM_TOKENIZERS": True},
-    docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
-)
-
-
-examples_torch_job = CircleCIJob(
-    "examples_torch",
-    additional_env={"OMP_NUM_THREADS": 8},
-    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
-    # TODO @ArthurZucker remove this once docker is easier to build
-    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
-    pytest_num_workers=4,
-)
-
-
-examples_tensorflow_job = CircleCIJob(
-    "examples_tensorflow",
-    additional_env={"OMP_NUM_THREADS": 8},
-    docker_image=[{"image":"huggingface/transformers-examples-tf"}],
-    pytest_num_workers=2,
-)
-
-
-hub_job = CircleCIJob(
-    "hub",
-    additional_env={"HUGGINGFACE_CO_STAGING": True},
-    docker_image=[{"image":"huggingface/transformers-torch-light"}],
-    install_steps=[
-        'uv venv && uv pip install .',
-        'git config --global user.email "ci@dummy.com"',
-        'git config --global user.name "ci"',
-    ],
-    marker="is_staging_test",
-    pytest_num_workers=2,
-    resource_class="medium",
-)
-
-
-onnx_job = CircleCIJob(
-    "onnx",
-    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
-    install_steps=[
-        "uv venv",
-        "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
-    ],
-    pytest_options={"k onnx": None},
-    pytest_num_workers=1,
-    resource_class="small",
-)
-
-
-exotic_models_job = CircleCIJob(
-    "exotic_models",
-    docker_image=[{"image":"huggingface/transformers-exotic-models"}],
-    parallelism=4,
-    pytest_options={"durations": 100},
-)
-
-
-repo_utils_job = CircleCIJob(
-    "repo_utils",
-    docker_image=[{"image":"huggingface/transformers-consistency"}],
-    pytest_num_workers=4,
-    resource_class="large",
-)
-
-
-non_model_job = CircleCIJob(
-    "non_model",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    # networkx==3.3 (after #36957) cause some issues
-    # TODO: remove this once it works directly
-    install_steps=["uv venv && uv pip install ."],
-    marker="not generate",
-    parallelism=6,
-)
-
-
-# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
-# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
-# the bash output redirection.)
-py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
-py_command = f"$(python3 -c '{py_command}')"
-command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
-doc_test_job = CircleCIJob(
-    "pr_documentation_tests",
-    docker_image=[{"image":"huggingface/transformers-consistency"}],
-    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
-    install_steps=[
-        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
-        "uv venv && pip install .",
-        "touch dummy.py",
-        command,
-        "cat pr_documentation_tests_temp.txt",
-        "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
-    ],
-    tests_to_run="$(cat pr_documentation_tests.txt)",  # noqa
-    pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
-    command_timeout=1200,  # test cannot run longer than 1200 seconds
-    pytest_num_workers=1,
-)
-
-REGULAR_TESTS = [torch_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
-EXAMPLES_TESTS = [examples_torch_job]
-PIPELINE_TESTS = [pipelines_torch_job]
-REPO_UTIL_TESTS = [repo_utils_job]
-DOC_TESTS = [doc_test_job]
-ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip
-
-
-def create_circleci_config(folder=None):
-    if folder is None:
-        folder = os.getcwd()
-    os.environ["test_preparation_dir"] = folder
-    jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
-    print("The following jobs will be run ", jobs)
-
-    if len(jobs) == 0:
-        jobs = [EmptyJob()]
-    else:
-        print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
-        # Add a job waiting all the test jobs and aggregate their test summary files at the end
-        collection_job = EmptyJob()
-        collection_job.job_name = "collection_job"
-        jobs = [collection_job] + jobs
-
-    config = {
-        "version": "2.1",
-        "parameters": {
-            # Only used to accept the parameters from the trigger
-            "nightly": {"type": "boolean", "default": False},
-            # Only used to accept the parameters from GitHub Actions trigger
-            "GHA_Actor": {"type": "string", "default": ""},
-            "GHA_Action": {"type": "string", "default": ""},
-            "GHA_Event": {"type": "string", "default": ""},
-            "GHA_Meta": {"type": "string", "default": ""},
-            "tests_to_run": {"type": "string", "default": ""},
-            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
-            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
-        },
-        "jobs": {j.job_name: j.to_dict() for j in jobs}
-    }
-    if "CIRCLE_TOKEN" in os.environ:
-        # For private forked repo. (e.g. new model addition)
-        config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
-    else:
-        # For public repo. (e.g. `transformers`)
-        config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
-    with open(os.path.join(folder, "generated_config.yml"), "w") as f:
-        f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--fetcher_folder", type=str, default=None, help="Only test that all tests and modules are accounted for."
-    )
-    args = parser.parse_args()
-
-    create_circleci_config(args.fetcher_folder)
--- a/.circleci/deploy.sh
+++ b/.circleci/deploy.sh
@ -0,0 +1,58 @@
+cd docs
+
+function deploy_doc(){
+	echo "Creating doc at commit $1 and pushing to folder $2"
+	git checkout $1
+	if [ ! -z "$2" ]
+	then
+		if [ "$2" == "master" ]; then
+		    echo "Pushing master"
+			make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir/$2/
+			cp -r _build/html/_static .
+		elif ssh -oStrictHostKeyChecking=no $doc "[ -d $dir/$2 ]"; then
+			echo "Directory" $2 "already exists"
+			scp -r -oStrictHostKeyChecking=no _static/* $doc:$dir/$2/_static/
+		else
+			echo "Pushing version" $2
+			make clean && make html
+			rm -rf _build/html/_static
+			cp -r _static _build/html
+			scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
+		fi
+	else
+		echo "Pushing stable"
+		make clean && make html
+		rm -rf _build/html/_static
+		cp -r _static _build/html
+		scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
+	fi
+}
+
+# You can find the commit for each tag on https://github.com/huggingface/transformers/tags
+deploy_doc "master" master
+deploy_doc "b33a385" v1.0.0
+deploy_doc "fe02e45" v1.1.0
+deploy_doc "89fd345" v1.2.0
+deploy_doc "fc9faa8" v2.0.0
+deploy_doc "3ddce1d" v2.1.1
+deploy_doc "3616209" v2.2.0
+deploy_doc "d0f8b9a" v2.3.0
+deploy_doc "6664ea9" v2.4.0
+deploy_doc "fb560dc" v2.5.0
+deploy_doc "b90745c" v2.5.1
+deploy_doc "fbc5bf1" v2.6.0
+deploy_doc "6f5a12a" v2.7.0
+deploy_doc "11c3257" v2.8.0
+deploy_doc "e7cfc1a" v2.9.0
+deploy_doc "7cb203f" v2.9.1
+deploy_doc "10d7239" v2.10.0 
+deploy_doc "b42586e" v2.11.0
+deploy_doc "7fb8bdf" v3.0.2
+deploy_doc "4b3ee9c" v3.1.0
+deploy_doc "3ebb1b3" v3.2.0
+deploy_doc "0613f05" v3.3.1
+deploy_doc "eb0e0ce" v3.4.0
+deploy_doc "818878d" v3.5.1
+deploy_doc "c781171" v4.0.0
+deploy_doc "bfa4ccf" v4.1.1
+deploy_doc "7d9a9d0" # v4.2.0 Latest stable release
--- a/.circleci/parse_test_outputs.py
+++ b/.circleci/parse_test_outputs.py
@ -1,70 +0,0 @@
-import re
-import argparse
-
-def parse_pytest_output(file_path):
-    skipped_tests = {}
-    skipped_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
-            if match:
-                skipped_count += 1
-                test_file, test_line, reason = match.groups()
-                skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
-    for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} skipped because: {k}")
-    print("Number of skipped tests:", skipped_count)
-
-def parse_pytest_failure_output(file_path):
-    failed_tests = {}
-    failed_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
-            if match:
-                failed_count += 1
-                _, error, reason = match.groups()
-                failed_tests[reason] = failed_tests.get(reason, []) + [error]
-    for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} failed because `{v[0]}` -> {k}")
-    print("Number of failed tests:", failed_count)
-    if failed_count>0:
-        exit(1)
-
-def parse_pytest_errors_output(file_path):
-    print(file_path)
-    error_tests = {}
-    error_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
-            if match:
-                error_count += 1
-                _, test_error, reason = match.groups()
-                error_tests[reason] = error_tests.get(reason, []) + [test_error]
-    for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
-    print("Number of errors:", error_count)
-    if error_count>0:
-        exit(1)
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--file", help="file to parse")
-    parser.add_argument("--skip", action="store_true", help="show skipped reasons")
-    parser.add_argument("--fail", action="store_true", help="show failed tests")
-    parser.add_argument("--errors", action="store_true", help="show failed tests")
-    args = parser.parse_args()
-
-    if args.skip:
-        parse_pytest_output(args.file)
-
-    if args.fail:
-        parse_pytest_failure_output(args.file)
-
-    if args.errors:
-        parse_pytest_errors_output(args.file)
-
-
-if __name__ == "__main__":
-    main()
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,12 @@
+[run]
+source=transformers
+omit =
+    # skip convertion scripts from testing for now
+    */convert_*
+    */__main__.py
+[report]
+exclude_lines =
+    pragma: no cover
+    raise
+    except
+    register_parameter
--- a/.gitattributes
+++ b/.gitattributes
@ -1,4 +0,0 @@
-*.py	eol=lf
-*.rst	eol=lf
-*.md	eol=lf
-*.mdx   eol=lf
--- a/.github/ISSUE_TEMPLATE/---new-benchmark.md
+++ b/.github/ISSUE_TEMPLATE/---new-benchmark.md
@ -0,0 +1,22 @@
+---
+name: "\U0001F5A5 New benchmark"
+about: Benchmark a part of this library and share your results
+title: "[Benchmark]"
+labels: ''
+assignees: ''
+
+---
+
+# 🖥 Benchmarking `transformers`
+
+## Benchmark
+
+Which part of `transformers` did you benchmark?
+
+## Set-up
+
+What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use?
+
+## Results
+
+Put your results here!
--- a/.github/ISSUE_TEMPLATE/--new-model-addition.md
+++ b/.github/ISSUE_TEMPLATE/--new-model-addition.md
@ -0,0 +1,20 @@
+---
+name: "\U0001F31F New model addition"
+about: Submit a proposal/request to implement a new Transformer-based model
+title: ''
+labels: New model
+assignees: ''
+
+---
+
+# 🌟 New model addition
+
+## Model description
+
+<!-- Important information -->
+
+## Open source status
+
+* [ ] the model implementation is available: (give details)
+* [ ] the model weights are available: (give details)
+* [ ] who are the authors: (mention them, if possible by @gh-username)
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@ -0,0 +1,90 @@
+---
+name: "\U0001F41B Bug Report"
+about: Submit a bug report to help us improve transformers
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+
+## Environment info
+<!-- You can run the command `transformers-cli env` and copy-and-paste its output below.
+     Don't forget to fill out the missing fields in that output! -->
+
+- `transformers` version:
+- Platform:
+- Python version:
+- PyTorch version (GPU?):
+- Tensorflow version (GPU?):
+- Using GPU in script?:
+- Using distributed or parallel set-up in script?:
+
+### Who can help
+<!-- Your issue will be replied to more quickly if you can figure out the right person to tag with @
+ If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
+ Please tag fewer than 3 people.
+
+Models:
+
+- albert, bert, xlm: @LysandreJik
+- blenderbot, bart, marian, pegasus, encoderdecoder,  t5: @patrickvonplaten, @patil-suraj
+- longformer, reformer, transfoxl, xlnet: @patrickvonplaten
+- fsmt: @stas00
+- funnel: @sgugger
+- gpt2: @patrickvonplaten, @LysandreJik
+- rag: @patrickvonplaten, @lhoestq
+- tensorflow: @jplu
+
+Library:
+
+- benchmarks: @patrickvonplaten
+- deepspeed: @stas00
+- ray/raytune: @richardliaw, @amogkam
+- text generation: @patrickvonplaten
+- tokenizers: @n1t0, @LysandreJik
+- trainer: @sgugger
+- pipelines: @LysandreJik
+
+Documentation: @sgugger
+
+HF projects:
+
+- nlp datasets: [different repo](https://github.com/huggingface/nlp)
+- rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
+
+Examples:
+
+- maintained examples (not research project or legacy): @sgugger, @patil-suraj
+- research_projects/bert-loses-patience: @JetRunner
+- research_projects/distillation: @VictorSanh
+
+ -->
+
+## Information
+
+Model I am using (Bert, XLNet ...):
+
+The problem arises when using:
+* [ ] the official example scripts: (give details below)
+* [ ] my own modified scripts: (give details below)
+
+The tasks I am working on is:
+* [ ] an official GLUE/SQUaD task: (give the name)
+* [ ] my own task or dataset: (give details below)
+
+## To reproduce
+
+Steps to reproduce the behavior:
+
+1.
+2.
+3.
+
+<!-- If you have code snippets, error messages, stack traces please provide them here as well.
+     Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
+     Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.-->
+
+## Expected behavior
+
+<!-- A clear and concise description of what you would expect to happen. -->
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -1,134 +0,0 @@
-name: "\U0001F41B Bug Report"
-description: Submit a bug report to help us improve transformers
-labels: [ "bug" ]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this bug report! 🤗
-
-        Before you submit your bug report:
-
-          - If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug)
-          - Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue
-
-  - type: textarea
-    id: system-info
-    attributes:
-      label: System Info
-      description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
-      placeholder: transformers version, platform, python version, ...
-    validations:
-      required: true
-
-  - type: textarea
-    id: who-can-help
-    attributes:
-      label: Who can help?
-      description: |
-        Your issue will be replied to more quickly if you can figure out the right person to tag with @
-        If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
-
-        All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
-        a core maintainer will ping the right person.
-
-        Please tag fewer than 3 people.
-
-        Models:
-
-          - text models: @ArthurZucker
-          - vision models: @amyeroberts, @qubvel
-          - speech models: @eustlb
-          - graph models: @clefourrier
-
-        Library:
-
-          - flax: @gante and @Rocketknight1
-          - generate: @zucchini-nlp (visual-language models) or @gante (all others)
-          - pipelines: @Rocketknight1
-          - tensorflow: @gante and @Rocketknight1
-          - tokenizers: @ArthurZucker and @itazap
-          - trainer: @zach-huggingface @SunMarc
-
-        Integrations:
-
-          - deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
-          - ray/raytune: @richardliaw, @amogkam
-          - Big Model Inference: @SunMarc
-          - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
-        
-        Devices/Backends:
-        
-          - AMD ROCm: @ivarflakstad
-          - Intel XPU: @IlyasMoutawwakil
-          - Ascend NPU: @ivarflakstad 
-
-        Documentation: @stevhliu
-
-        Model hub:
-
-          - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
-
-        HF projects:
-
-          - accelerate: [different repo](https://github.com/huggingface/accelerate)
-          - datasets: [different repo](https://github.com/huggingface/datasets)
-          - diffusers: [different repo](https://github.com/huggingface/diffusers)
-          - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
-
-        Maintained examples (not research project or legacy):
-
-          - Flax: @Rocketknight1
-          - PyTorch: See Models above and tag the person corresponding to the modality of the example.
-          - TensorFlow: @Rocketknight1
-
-        Research projects are not maintained and should be taken as is.
-
-      placeholder: "@Username ..."
-
-  - type: checkboxes
-    id: information-scripts-examples
-    attributes:
-      label: Information
-      description: 'The problem arises when using:'
-      options:
-        - label: "The official example scripts"
-        - label: "My own modified scripts"
-
-  - type: checkboxes
-    id: information-tasks
-    attributes:
-      label: Tasks
-      description: "The tasks I am working on are:"
-      options:
-        - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
-        - label: "My own task or dataset (give details below)"
-
-  - type: textarea
-    id: reproduction
-    validations:
-      required: true
-    attributes:
-      label: Reproduction
-      description: |
-        Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
-        Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
-        If you have code snippets, error messages, stack traces please provide them here as well.
-        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
-        Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
-
-      placeholder: |
-        Steps to reproduce the behavior:
-
-          1.
-          2.
-          3.
-
-
-  - type: textarea
-    id: expected-behavior
-    validations:
-      required: true
-    attributes:
-      label: Expected behavior
-      description: "A clear and concise description of what you would expect to happen."
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,12 +0,0 @@
-blank_issues_enabled: true
-version: 2.1
-contact_links:
-  - name: Model checkpoints on the Hugging Face Hub
-    url: https://huggingface.co/models
-    about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub
-  - name: Website Related
-    url: https://github.com/huggingface/hub-docs/issues
-    about: Feature requests and bug reports related to the website
-  - name: Forum
-    url: https://discuss.huggingface.co/
-    about: General usage questions and community discussions
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@ -0,0 +1,25 @@
+---
+name: "\U0001F680 Feature request"
+about: Submit a proposal/request for a new transformers feature
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+# 🚀 Feature request
+
+<!-- A clear and concise description of the feature proposal.
+     Please provide a link to the paper and code in case they exist. -->
+
+## Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request
+     related to a problem? e.g., I'm always frustrated when [...]. If this is related
+     to another GitHub issue, please link here too. -->
+
+## Your contribution
+
+<!-- Is there any way that you could help, e.g. by submitting a PR?
+     Make sure to read the CONTRIBUTING.MD readme:
+     https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md -->
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -1,31 +0,0 @@
-name: "\U0001F680 Feature request"
-description: Submit a proposal/request for a new transformers feature
-labels: [ "Feature request" ]
-body:
-  - type: textarea
-    id: feature-request
-    validations:
-      required: true
-    attributes:
-      label: Feature request
-      description: |
-        A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
-
-  - type: textarea
-    id: motivation
-    validations:
-      required: true
-    attributes:
-      label: Motivation
-      description: |
-        Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
-
-
-  - type: textarea
-    id: contribution
-    validations:
-      required: true
-    attributes:
-      label: Your contribution
-      description: |
-        Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md)
--- a/.github/ISSUE_TEMPLATE/i18n.md
+++ b/.github/ISSUE_TEMPLATE/i18n.md
@ -1,46 +0,0 @@
---
-name: 🌐 Translating a new language?
-about: Start a new translation effort in your language
-title: '[i18n-<languageCode>] Translating docs to <languageName>'
-labels: WIP
-assignees: ''
-
---
-
-<!--
-Note: Please search to see if an issue already exists for the language you are trying to translate.
-->
-
-Hi!
-
-Let's bring the documentation to all the <languageName>-speaking community 🌐 (currently 0 out of 267 complete)
-
-Who would want to translate? Please follow the 🤗 [TRANSLATING guide](https://github.com/huggingface/transformers/blob/main/docs/TRANSLATING.md). Here is a list of the files ready for translation. Let us know in this issue if you'd like to translate any, and we'll add your name to the list.
-
-Some notes:
-
-* Please translate using an informal tone (imagine you are talking with a friend about transformers 🤗).
-* Please translate in a gender-neutral way.
-* Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
-* Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
-* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu for review.
-* 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).
-
-## Get Started section
-
- [ ] [index.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/index.md) https://github.com/huggingface/transformers/pull/20180
- [ ] [quicktour.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/quicktour.md) (waiting for initial PR to go through)
- [ ] [installation.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/installation.md).
-
-## Tutorial section
- [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
- [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
- [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
- [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)
- [ ]  [model_sharing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/model_sharing.md)
- [ ]  [multilingual.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/multilingual.md)
-
-<!--
-Keep on adding more as you go 🔥
-->
--- a/.github/ISSUE_TEMPLATE/migration.md
+++ b/.github/ISSUE_TEMPLATE/migration.md
@ -0,0 +1,58 @@
+---
+name: "\U0001F4DA Migration from pytorch-pretrained-bert or pytorch-transformers"
+about: Report a problem when migrating from pytorch-pretrained-bert or pytorch-transformers
+  to transformers
+title: ''
+labels: Migration
+assignees: ''
+
+---
+
+# 📚 Migration
+
+## Information
+
+<!-- Important information -->
+
+Model I am using (Bert, XLNet ...):
+
+Language I am using the model on (English, Chinese ...):
+
+The problem arises when using:
+* [ ] the official example scripts: (give details below)
+* [ ] my own modified scripts: (give details below)
+
+The tasks I am working on is:
+* [ ] an official GLUE/SQUaD task: (give the name)
+* [ ] my own task or dataset: (give details below)
+
+## Details
+
+<!-- A clear and concise description of the migration issue.
+    If you have code snippets, please provide it here as well.
+    Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
+    Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
+    -->
+
+## Environment info
+<!-- You can run the command `python transformers-cli env` and copy-and-paste its output below.
+     Don't forget to fill out the missing fields in that output! -->
+ 
+- `transformers` version:
+- Platform:
+- Python version:
+- PyTorch version (GPU?):
+- Tensorflow version (GPU?):
+- Using GPU in script?:
+- Using distributed or parallel set-up in script?:
+
+<!-- IMPORTANT: which version of the former library do you use? -->
+* `pytorch-transformers` or `pytorch-pretrained-bert` version (or branch):
+
+
+## Checklist
+
+- [ ] I have read the migration guide in the readme.
+ ([pytorch-transformers](https://github.com/huggingface/transformers#migrating-from-pytorch-transformers-to-transformers);
+  [pytorch-pretrained-bert](https://github.com/huggingface/transformers#migrating-from-pytorch-pretrained-bert-to-transformers))
+- [ ] I checked if a related official extension example runs on my machine.
--- a/.github/ISSUE_TEMPLATE/migration.yml
+++ b/.github/ISSUE_TEMPLATE/migration.yml
@ -1,72 +0,0 @@
-name: "\U0001F4DA Migration from pytorch-pretrained-bert or pytorch-transformers"
-description: Report a problem when migrating from pytorch-pretrained-bert or pytorch-transformers to transformers
-labels: [ "migration" ]
-body:
-  - type: textarea
-    id: system-info
-    attributes:
-      label: System Info
-      description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
-      render: shell
-      placeholder: transformers version, platform, python version, ...
-    validations:
-      required: true
-
-  - type: checkboxes
-    id: information-scripts-examples
-    attributes:
-      label: Information
-      description: 'The problem arises when using:'
-      options:
-        - label: "The official example scripts"
-        - label: "My own modified scripts"
-
-  - type: checkboxes
-    id: information-tasks
-    attributes:
-      label: Tasks
-      description: "The tasks I am working on are:"
-      options:
-        - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
-        - label: "My own task or dataset (give details below)"
-
-  - type: textarea
-    id: reproduction
-    validations:
-      required: true
-    attributes:
-      label: Reproduction
-      description: |
-        Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
-        If you have code snippets, error messages, stack traces please provide them here as well.
-        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
-        Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
-
-      placeholder: |
-        Steps to reproduce the behavior:
-          
-          1.
-          2.
-          3.
-          
-
-  - type: textarea
-    id: expected-behavior
-    validations:
-      required: true
-    attributes:
-      label: Expected behavior
-      description: "A clear and concise description of what you would expect to happen."
-      render: shell
-
-  - type: checkboxes
-    id: checklist
-    attributes:
-      label: Checklist
-      options:
-        - label: "I have read the migration guide in the readme.
- ([pytorch-transformers](https://github.com/huggingface/transformers#migrating-from-pytorch-transformers-to-transformers);
-  [pytorch-pretrained-bert](https://github.com/huggingface/transformers#migrating-from-pytorch-pretrained-bert-to-transformers))"
-          required: true
-        - label: "I checked if a related official extension example runs on my machine."
-          required: true
--- a/.github/ISSUE_TEMPLATE/new-model-addition.yml
+++ b/.github/ISSUE_TEMPLATE/new-model-addition.yml
@ -1,31 +0,0 @@
-name: "\U0001F31F New model addition"
-description: Submit a proposal/request to implement a new model
-labels: [ "New model" ]
-
-body:
-  - type: textarea
-    id: description-request
-    validations:
-      required: true
-    attributes:
-      label: Model description
-      description: |
-        Put any and all important information relative to the model
-
-  - type: checkboxes
-    id: information-tasks
-    attributes:
-      label: Open source status
-      description: |
-          Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `transformers`.
-      options:
-        - label: "The model implementation is available"
-        - label: "The model weights are available"
-
-  - type: textarea
-    id: additional-info
-    attributes:
-      label: Provide useful links for the implementation
-      description: |
-        Please provide information regarding the implementation, the weights, and the authors.
-        Please mention the authors by @gh-username if you're aware of their usernames.
--- a/.github/ISSUE_TEMPLATE/question-help.md
+++ b/.github/ISSUE_TEMPLATE/question-help.md
@ -0,0 +1,26 @@
+---
+name: "❓ Questions & Help"
+about: Post your general questions on the Hugging Face forum: https://discuss.huggingface.co/
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+# ❓ Questions & Help
+
+<!-- The GitHub issue tracker is primarly intended for bugs, feature requests,
+     new models, benchmarks, and migration questions. For all other questions,
+     we direct you to the Hugging Face forum: https://discuss.huggingface.co/ .
+     -->
+
+## Details
+
+<!-- Description of your issue -->
+
+<!-- You should first ask your question on the forum, and only if
+     you didn't get an answer after a few days ask it here on GitHub. -->
+
+**A link to original question on the forum**:
+
+<!-- Your issue will be closed if you don't fill this part. -->
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -17,20 +17,20 @@ Fixes # (issue)

 ## Before submitting
 - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
- [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request),
+- [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
 - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
 - [ ] Did you make sure to update the documentation with your changes? Here are the
-      [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and
-      [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
+      [documentation guidelines](https://github.com/huggingface/transformers/tree/master/docs), and
+      [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/master/docs#writing-source-documentation).
 - [ ] Did you write any new necessary tests?


 ## Who can review?

 Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
-members/contributors who may be interested in your PR.
+members/contributors which may be interested in your PR.

 <!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @

@ -39,41 +39,36 @@ members/contributors who may be interested in your PR.

 Models:

- text models: @ArthurZucker
- vision models: @amyeroberts, @qubvel
- speech models: @eustlb
- graph models: @clefourrier
+- albert, bert, xlm: @LysandreJik
+- blenderbot, bart, marian, pegasus, encoderdecoder,  t5: @patrickvonplaten, @patil-suraj
+- longformer, reformer, transfoxl, xlnet: @patrickvonplaten
+- fsmt: @stas00
+- funnel: @sgugger
+- gpt2: @patrickvonplaten, @LysandreJik
+- rag: @patrickvonplaten, @lhoestq
+- tensorflow: @jplu

 Library:

- flax: @gante and @Rocketknight1
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- pipelines: @Rocketknight1
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @zach-huggingface and @SunMarc
- chat templates: @Rocketknight1
-
-Integrations:
-
- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
+- benchmarks: @patrickvonplaten
+- deepspeed: @stas00
 - ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
+- text generation: @patrickvonplaten
+- tokenizers: @n1t0, @LysandreJik
+- trainer: @sgugger
+- pipelines: @LysandreJik

-Documentation: @stevhliu
+Documentation: @sgugger

 HF projects:

- accelerate: [different repo](https://github.com/huggingface/accelerate)
- datasets: [different repo](https://github.com/huggingface/datasets)
- diffusers: [different repo](https://github.com/huggingface/diffusers)
+- nlp datasets: [different repo](https://github.com/huggingface/nlp)
 - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)

-Maintained examples (not research project or legacy):
+Examples:

- Flax: @Rocketknight1
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
- TensorFlow: @Rocketknight1
+- maintained examples (not research project or legacy): @sgugger, @patil-suraj
+- research_projects/bert-loses-patience: @JetRunner
+- research_projects/distillation: @VictorSanh

 -->
--- a/.github/conda/meta.yaml
+++ b/.github/conda/meta.yaml
@ -14,9 +14,8 @@ requirements:
  host:
    - python
    - pip
-    - numpy >=1.17
+    - numpy
    - dataclasses
-    - huggingface_hub
    - packaging
    - filelock
    - requests
@ -24,15 +23,11 @@ requirements:
    - sacremoses
    - regex !=2019.12.17
    - protobuf
-    - tokenizers >=0.11.1,!=0.11.3,<0.13
-    - pyyaml >=5.1
-    - safetensors
-    - fsspec
+    - tokenizers ==0.9.4
  run:
    - python
-    - numpy >=1.17
+    - numpy
    - dataclasses
-    - huggingface_hub
    - packaging
    - filelock
    - requests
@ -40,10 +35,7 @@ requirements:
    - sacremoses
    - regex !=2019.12.17
    - protobuf
-    - tokenizers >=0.11.1,!=0.11.3,<0.13
-    - pyyaml >=5.1
-    - safetensors
-    - fsspec
+    - tokenizers ==0.9.4

 test:
  imports:
--- a/.github/scripts/assign_reviewers.py
+++ b/.github/scripts/assign_reviewers.py
@ -1,120 +0,0 @@
-# coding=utf-8
-# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import github
-import json
-from github import Github
-import re
-from collections import Counter
-from pathlib import Path
-
-def pattern_to_regex(pattern):
-    if pattern.startswith("/"):
-        start_anchor = True
-        pattern = re.escape(pattern[1:])
-    else:
-        start_anchor = False
-        pattern = re.escape(pattern)
-    # Replace `*` with "any number of non-slash characters"
-    pattern = pattern.replace(r"\*", "[^/]*")
-    if start_anchor:
-        pattern = r"^\/?" + pattern  # Allow an optional leading slash after the start of the string
-    return pattern
-
-def get_file_owners(file_path, codeowners_lines):
-    # Process lines in reverse (last matching pattern takes precedence)
-    for line in reversed(codeowners_lines):
-        # Skip comments and empty lines, strip inline comments
-        line = line.split('#')[0].strip()
-        if not line:
-            continue
-
-        # Split into pattern and owners
-        parts = line.split()
-        pattern = parts[0]
-        # Can be empty, e.g. for dummy files with explicitly no owner!
-        owners = [owner.removeprefix("@") for owner in parts[1:]]
-
-        # Check if file matches pattern
-        file_regex = pattern_to_regex(pattern)
-        if re.search(file_regex, file_path) is not None:
-            return owners  # Remember, can still be empty!
-    return []  # Should never happen, but just in case
-
-def pr_author_is_in_hf(pr_author, codeowners_lines):
-    # Check if the PR author is in the codeowners file
-    for line in codeowners_lines:
-        line = line.split('#')[0].strip()
-        if not line:
-            continue
-
-        # Split into pattern and owners
-        parts = line.split()
-        owners = [owner.removeprefix("@") for owner in parts[1:]]
-
-        if pr_author in owners:
-            return True
-    return False
-
-def main():
-    script_dir = Path(__file__).parent.absolute()
-    with open(script_dir / "codeowners_for_review_action") as f:
-        codeowners_lines = f.readlines()
-
-    g = Github(os.environ['GITHUB_TOKEN'])
-    repo = g.get_repo("huggingface/transformers")
-    with open(os.environ['GITHUB_EVENT_PATH']) as f:
-        event = json.load(f)
-
-    # The PR number is available in the event payload
-    pr_number = event['pull_request']['number']
-    pr = repo.get_pull(pr_number)
-    pr_author = pr.user.login
-    if pr_author_is_in_hf(pr_author, codeowners_lines):
-        print(f"PR author {pr_author} is in codeowners, skipping review request.")
-        return
-
-    existing_reviews = list(pr.get_reviews())
-    if existing_reviews:
-        print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
-        return
-
-    users_requested, teams_requested = pr.get_review_requests()
-    users_requested = list(users_requested)
-    if users_requested:
-        print(f"Reviewers already requested: {users_requested}")
-        return
-
-    locs_per_owner = Counter()
-    for file in pr.get_files():
-        owners = get_file_owners(file.filename, codeowners_lines)
-        for owner in owners:
-            locs_per_owner[owner] += file.changes
-
-    # Assign the top 2 based on locs changed as reviewers, but skip the owner if present
-    locs_per_owner.pop(pr_author, None)
-    top_owners = locs_per_owner.most_common(2)
-    print("Top owners", top_owners)
-    top_owners = [owner[0] for owner in top_owners]
-    try:
-        pr.create_review_request(top_owners)
-    except github.GithubException as e:
-        print(f"Failed to request review for {top_owners}: {e}")
-
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/scripts/codeowners_for_review_action
+++ b/.github/scripts/codeowners_for_review_action
@ -1,370 +0,0 @@
-# Top-level rules are matched only if nothing else matches
-* @Rocketknight1 @ArthurZucker # if no one is pinged based on the other rules, he will do the dispatch
-*.md @stevhliu
-*tokenization* @ArthurZucker
-docs/ @stevhliu
-/benchmark/ @McPatate
-/docker/ @ydshieh @ArthurZucker
-
-# More high-level globs catch cases when specific rules later don't apply
-/src/transformers/models/*/processing* @molbap @yonigozlan @qubvel
-/src/transformers/models/*/image_processing* @qubvel
-/src/transformers/models/*/image_processing_*_fast* @yonigozlan
-
-# Owners of subsections of the library
-/src/transformers/generation/ @gante
-/src/transformers/pipeline/ @Rocketknight1 @yonigozlan
-/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface
-/src/transformers/quantizers/ @SunMarc @MekkCyber
-tests/ @ydshieh
-tests/generation/ @gante
-
-/src/transformers/models/auto/ @ArthurZucker
-/src/transformers/utils/ @ArthurZucker @Rocketknight1
-/src/transformers/loss/ @ArthurZucker
-/src/transformers/onnx/ @michaelbenayoun
-
-# Specific files come after the sections/globs, so they take priority
-/.circleci/config.yml @ArthurZucker @ydshieh
-/utils/tests_fetcher.py @ydshieh
-trainer.py @zach-huggingface @SunMarc
-trainer_utils.py @zach-huggingface @SunMarc
-/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker
-
-# Owners of individual models are specific / high priority, and so they come last
-# mod* captures modeling and modular files
-
-# Text models
-/src/transformers/models/albert/mod*_albert* @ArthurZucker
-/src/transformers/models/bamba/mod*_bamba* @ArthurZucker
-/src/transformers/models/bart/mod*_bart* @ArthurZucker
-/src/transformers/models/barthez/mod*_barthez* @ArthurZucker
-/src/transformers/models/bartpho/mod*_bartpho* @ArthurZucker
-/src/transformers/models/bert/mod*_bert* @ArthurZucker
-/src/transformers/models/bert_generation/mod*_bert_generation* @ArthurZucker
-/src/transformers/models/bert_japanese/mod*_bert_japanese* @ArthurZucker
-/src/transformers/models/bertweet/mod*_bertweet* @ArthurZucker
-/src/transformers/models/big_bird/mod*_big_bird* @ArthurZucker
-/src/transformers/models/bigbird_pegasus/mod*_bigbird_pegasus* @ArthurZucker
-/src/transformers/models/biogpt/mod*_biogpt* @ArthurZucker
-/src/transformers/models/blenderbot/mod*_blenderbot* @ArthurZucker
-/src/transformers/models/blenderbot_small/mod*_blenderbot_small* @ArthurZucker
-/src/transformers/models/bloom/mod*_bloom* @ArthurZucker
-/src/transformers/models/bort/mod*_bort* @ArthurZucker
-/src/transformers/models/byt5/mod*_byt5* @ArthurZucker
-/src/transformers/models/camembert/mod*_camembert* @ArthurZucker
-/src/transformers/models/canine/mod*_canine* @ArthurZucker
-/src/transformers/models/codegen/mod*_codegen* @ArthurZucker
-/src/transformers/models/code_llama/mod*_code_llama* @ArthurZucker
-/src/transformers/models/cohere/mod*_cohere* @ArthurZucker
-/src/transformers/models/cohere2/mod*_cohere2* @ArthurZucker
-/src/transformers/models/convbert/mod*_convbert* @ArthurZucker
-/src/transformers/models/cpm/mod*_cpm* @ArthurZucker
-/src/transformers/models/cpmant/mod*_cpmant* @ArthurZucker
-/src/transformers/models/ctrl/mod*_ctrl* @ArthurZucker
-/src/transformers/models/dbrx/mod*_dbrx* @ArthurZucker
-/src/transformers/models/deberta/mod*_deberta* @ArthurZucker
-/src/transformers/models/deberta_v2/mod*_deberta_v2* @ArthurZucker
-/src/transformers/models/dialogpt/mod*_dialogpt* @ArthurZucker
-/src/transformers/models/diffllama/mod*_diffllama* @ArthurZucker
-/src/transformers/models/distilbert/mod*_distilbert* @ArthurZucker
-/src/transformers/models/dpr/mod*_dpr* @ArthurZucker
-/src/transformers/models/electra/mod*_electra* @ArthurZucker
-/src/transformers/models/encoder_decoder/mod*_encoder_decoder* @ArthurZucker
-/src/transformers/models/ernie/mod*_ernie* @ArthurZucker
-/src/transformers/models/ernie_m/mod*_ernie_m* @ArthurZucker
-/src/transformers/models/esm/mod*_esm* @ArthurZucker
-/src/transformers/models/falcon/mod*_falcon* @ArthurZucker
-/src/transformers/models/falcon3/mod*_falcon3* @ArthurZucker
-/src/transformers/models/falcon_mamba/mod*_falcon_mamba* @ArthurZucker
-/src/transformers/models/fastspeech2_conformer/mod*_fastspeech2_conformer* @ArthurZucker
-/src/transformers/models/flan_t5/mod*_flan_t5* @ArthurZucker
-/src/transformers/models/flan_ul2/mod*_flan_ul2* @ArthurZucker
-/src/transformers/models/flaubert/mod*_flaubert* @ArthurZucker
-/src/transformers/models/fnet/mod*_fnet* @ArthurZucker
-/src/transformers/models/fsmt/mod*_fsmt* @ArthurZucker
-/src/transformers/models/funnel/mod*_funnel* @ArthurZucker
-/src/transformers/models/fuyu/mod*_fuyu* @ArthurZucker
-/src/transformers/models/gemma/mod*_gemma* @ArthurZucker
-/src/transformers/models/gemma2/mod*_gemma2* @ArthurZucker
-/src/transformers/models/glm/mod*_glm* @ArthurZucker
-/src/transformers/models/openai_gpt/mod*_openai_gpt* @ArthurZucker
-/src/transformers/models/gpt_neo/mod*_gpt_neo* @ArthurZucker
-/src/transformers/models/gpt_neox/mod*_gpt_neox* @ArthurZucker
-/src/transformers/models/gpt_neox_japanese/mod*_gpt_neox_japanese* @ArthurZucker
-/src/transformers/models/gptj/mod*_gptj* @ArthurZucker
-/src/transformers/models/gpt2/mod*_gpt2* @ArthurZucker
-/src/transformers/models/gpt_bigcode/mod*_gpt_bigcode* @ArthurZucker
-/src/transformers/models/gptsan_japanese/mod*_gptsan_japanese* @ArthurZucker
-/src/transformers/models/gpt_sw3/mod*_gpt_sw3* @ArthurZucker
-/src/transformers/models/granite/mod*_granite* @ArthurZucker
-/src/transformers/models/granitemoe/mod*_granitemoe* @ArthurZucker
-/src/transformers/models/herbert/mod*_herbert* @ArthurZucker
-/src/transformers/models/ibert/mod*_ibert* @ArthurZucker
-/src/transformers/models/jamba/mod*_jamba* @ArthurZucker
-/src/transformers/models/jetmoe/mod*_jetmoe* @ArthurZucker
-/src/transformers/models/jukebox/mod*_jukebox* @ArthurZucker
-/src/transformers/models/led/mod*_led* @ArthurZucker
-/src/transformers/models/llama/mod*_llama* @ArthurZucker @Cyrilvallez
-/src/transformers/models/longformer/mod*_longformer* @ArthurZucker
-/src/transformers/models/longt5/mod*_longt5* @ArthurZucker
-/src/transformers/models/luke/mod*_luke* @ArthurZucker
-/src/transformers/models/m2m_100/mod*_m2m_100* @ArthurZucker
-/src/transformers/models/madlad_400/mod*_madlad_400* @ArthurZucker
-/src/transformers/models/mamba/mod*_mamba* @ArthurZucker
-/src/transformers/models/mamba2/mod*_mamba2* @ArthurZucker
-/src/transformers/models/marian/mod*_marian* @ArthurZucker
-/src/transformers/models/markuplm/mod*_markuplm* @ArthurZucker
-/src/transformers/models/mbart/mod*_mbart* @ArthurZucker
-/src/transformers/models/mega/mod*_mega* @ArthurZucker
-/src/transformers/models/megatron_bert/mod*_megatron_bert* @ArthurZucker
-/src/transformers/models/megatron_gpt2/mod*_megatron_gpt2* @ArthurZucker
-/src/transformers/models/mistral/mod*_mistral* @ArthurZucker
-/src/transformers/models/mixtral/mod*_mixtral* @ArthurZucker
-/src/transformers/models/mluke/mod*_mluke* @ArthurZucker
-/src/transformers/models/mobilebert/mod*_mobilebert* @ArthurZucker
-/src/transformers/models/modernbert/mod*_modernbert* @ArthurZucker
-/src/transformers/models/mpnet/mod*_mpnet* @ArthurZucker
-/src/transformers/models/mpt/mod*_mpt* @ArthurZucker
-/src/transformers/models/mra/mod*_mra* @ArthurZucker
-/src/transformers/models/mt5/mod*_mt5* @ArthurZucker
-/src/transformers/models/mvp/mod*_mvp* @ArthurZucker
-/src/transformers/models/myt5/mod*_myt5* @ArthurZucker
-/src/transformers/models/nemotron/mod*_nemotron* @ArthurZucker
-/src/transformers/models/nezha/mod*_nezha* @ArthurZucker
-/src/transformers/models/nllb/mod*_nllb* @ArthurZucker
-/src/transformers/models/nllb_moe/mod*_nllb_moe* @ArthurZucker
-/src/transformers/models/nystromformer/mod*_nystromformer* @ArthurZucker
-/src/transformers/models/olmo/mod*_olmo* @ArthurZucker
-/src/transformers/models/olmo2/mod*_olmo2* @ArthurZucker
-/src/transformers/models/olmoe/mod*_olmoe* @ArthurZucker
-/src/transformers/models/open_llama/mod*_open_llama* @ArthurZucker
-/src/transformers/models/opt/mod*_opt* @ArthurZucker
-/src/transformers/models/pegasus/mod*_pegasus* @ArthurZucker
-/src/transformers/models/pegasus_x/mod*_pegasus_x* @ArthurZucker
-/src/transformers/models/persimmon/mod*_persimmon* @ArthurZucker
-/src/transformers/models/phi/mod*_phi* @ArthurZucker
-/src/transformers/models/phi3/mod*_phi3* @ArthurZucker
-/src/transformers/models/phimoe/mod*_phimoe* @ArthurZucker
-/src/transformers/models/phobert/mod*_phobert* @ArthurZucker
-/src/transformers/models/plbart/mod*_plbart* @ArthurZucker
-/src/transformers/models/prophetnet/mod*_prophetnet* @ArthurZucker
-/src/transformers/models/qdqbert/mod*_qdqbert* @ArthurZucker
-/src/transformers/models/qwen2/mod*_qwen2* @ArthurZucker
-/src/transformers/models/qwen2_moe/mod*_qwen2_moe* @ArthurZucker
-/src/transformers/models/rag/mod*_rag* @ArthurZucker
-/src/transformers/models/realm/mod*_realm* @ArthurZucker
-/src/transformers/models/recurrent_gemma/mod*_recurrent_gemma* @ArthurZucker
-/src/transformers/models/reformer/mod*_reformer* @ArthurZucker
-/src/transformers/models/rembert/mod*_rembert* @ArthurZucker
-/src/transformers/models/retribert/mod*_retribert* @ArthurZucker
-/src/transformers/models/roberta/mod*_roberta* @ArthurZucker
-/src/transformers/models/roberta_prelayernorm/mod*_roberta_prelayernorm* @ArthurZucker
-/src/transformers/models/roc_bert/mod*_roc_bert* @ArthurZucker
-/src/transformers/models/roformer/mod*_roformer* @ArthurZucker
-/src/transformers/models/rwkv/mod*_rwkv* @ArthurZucker
-/src/transformers/models/splinter/mod*_splinter* @ArthurZucker
-/src/transformers/models/squeezebert/mod*_squeezebert* @ArthurZucker
-/src/transformers/models/stablelm/mod*_stablelm* @ArthurZucker
-/src/transformers/models/starcoder2/mod*_starcoder2* @ArthurZucker
-/src/transformers/models/switch_transformers/mod*_switch_transformers* @ArthurZucker
-/src/transformers/models/t5/mod*_t5* @ArthurZucker
-/src/transformers/models/t5v1.1/mod*_t5v1.1* @ArthurZucker
-/src/transformers/models/tapex/mod*_tapex* @ArthurZucker
-/src/transformers/models/transfo_xl/mod*_transfo_xl* @ArthurZucker
-/src/transformers/models/ul2/mod*_ul2* @ArthurZucker
-/src/transformers/models/umt5/mod*_umt5* @ArthurZucker
-/src/transformers/models/xmod/mod*_xmod* @ArthurZucker
-/src/transformers/models/xglm/mod*_xglm* @ArthurZucker
-/src/transformers/models/xlm/mod*_xlm* @ArthurZucker
-/src/transformers/models/xlm_prophetnet/mod*_xlm_prophetnet* @ArthurZucker
-/src/transformers/models/xlm_roberta/mod*_xlm_roberta* @ArthurZucker
-/src/transformers/models/xlm_roberta_xl/mod*_xlm_roberta_xl* @ArthurZucker
-/src/transformers/models/xlm_v/mod*_xlm_v* @ArthurZucker
-/src/transformers/models/xlnet/mod*_xlnet* @ArthurZucker
-/src/transformers/models/yoso/mod*_yoso* @ArthurZucker
-/src/transformers/models/zamba/mod*_zamba* @ArthurZucker
-
-# Vision models
-/src/transformers/models/beit/mod*_beit* @amyeroberts @qubvel
-/src/transformers/models/bit/mod*_bit* @amyeroberts @qubvel
-/src/transformers/models/conditional_detr/mod*_conditional_detr* @amyeroberts @qubvel
-/src/transformers/models/convnext/mod*_convnext* @amyeroberts @qubvel
-/src/transformers/models/convnextv2/mod*_convnextv2* @amyeroberts @qubvel
-/src/transformers/models/cvt/mod*_cvt* @amyeroberts @qubvel
-/src/transformers/models/deformable_detr/mod*_deformable_detr* @amyeroberts @qubvel
-/src/transformers/models/deit/mod*_deit* @amyeroberts @qubvel
-/src/transformers/models/depth_anything/mod*_depth_anything* @amyeroberts @qubvel
-/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @amyeroberts @qubvel
-/src/transformers/models/deta/mod*_deta* @amyeroberts @qubvel
-/src/transformers/models/detr/mod*_detr* @amyeroberts @qubvel
-/src/transformers/models/dinat/mod*_dinat* @amyeroberts @qubvel
-/src/transformers/models/dinov2/mod*_dinov2* @amyeroberts @qubvel
-/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @amyeroberts @qubvel
-/src/transformers/models/dit/mod*_dit* @amyeroberts @qubvel
-/src/transformers/models/dpt/mod*_dpt* @amyeroberts @qubvel
-/src/transformers/models/efficientformer/mod*_efficientformer* @amyeroberts @qubvel
-/src/transformers/models/efficientnet/mod*_efficientnet* @amyeroberts @qubvel
-/src/transformers/models/focalnet/mod*_focalnet* @amyeroberts @qubvel
-/src/transformers/models/glpn/mod*_glpn* @amyeroberts @qubvel
-/src/transformers/models/hiera/mod*_hiera* @amyeroberts @qubvel
-/src/transformers/models/ijepa/mod*_ijepa* @amyeroberts @qubvel
-/src/transformers/models/imagegpt/mod*_imagegpt* @amyeroberts @qubvel
-/src/transformers/models/levit/mod*_levit* @amyeroberts @qubvel
-/src/transformers/models/mask2former/mod*_mask2former* @amyeroberts @qubvel
-/src/transformers/models/maskformer/mod*_maskformer* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @amyeroberts @qubvel
-/src/transformers/models/mobilevit/mod*_mobilevit* @amyeroberts @qubvel
-/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @amyeroberts @qubvel
-/src/transformers/models/nat/mod*_nat* @amyeroberts @qubvel
-/src/transformers/models/poolformer/mod*_poolformer* @amyeroberts @qubvel
-/src/transformers/models/pvt/mod*_pvt* @amyeroberts @qubvel
-/src/transformers/models/pvt_v2/mod*_pvt_v2* @amyeroberts @qubvel
-/src/transformers/models/regnet/mod*_regnet* @amyeroberts @qubvel
-/src/transformers/models/resnet/mod*_resnet* @amyeroberts @qubvel
-/src/transformers/models/rt_detr/mod*_rt_detr* @amyeroberts @qubvel
-/src/transformers/models/segformer/mod*_segformer* @amyeroberts @qubvel
-/src/transformers/models/seggpt/mod*_seggpt* @amyeroberts @qubvel
-/src/transformers/models/superpoint/mod*_superpoint* @amyeroberts @qubvel
-/src/transformers/models/swiftformer/mod*_swiftformer* @amyeroberts @qubvel
-/src/transformers/models/swin/mod*_swin* @amyeroberts @qubvel
-/src/transformers/models/swinv2/mod*_swinv2* @amyeroberts @qubvel
-/src/transformers/models/swin2sr/mod*_swin2sr* @amyeroberts @qubvel
-/src/transformers/models/table_transformer/mod*_table_transformer* @amyeroberts @qubvel
-/src/transformers/models/textnet/mod*_textnet* @amyeroberts @qubvel
-/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @amyeroberts @qubvel
-/src/transformers/models/upernet/mod*_upernet* @amyeroberts @qubvel
-/src/transformers/models/van/mod*_van* @amyeroberts @qubvel
-/src/transformers/models/vit/mod*_vit* @amyeroberts @qubvel
-/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @amyeroberts @qubvel
-/src/transformers/models/vitdet/mod*_vitdet* @amyeroberts @qubvel
-/src/transformers/models/vit_mae/mod*_vit_mae* @amyeroberts @qubvel
-/src/transformers/models/vitmatte/mod*_vitmatte* @amyeroberts @qubvel
-/src/transformers/models/vit_msn/mod*_vit_msn* @amyeroberts @qubvel
-/src/transformers/models/vitpose/mod*_vitpose* @amyeroberts @qubvel
-/src/transformers/models/yolos/mod*_yolos* @amyeroberts @qubvel
-/src/transformers/models/zoedepth/mod*_zoedepth* @amyeroberts @qubvel
-
-# Audio models
-/src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb
-/src/transformers/models/bark/mod*_bark* @eustlb
-/src/transformers/models/clap/mod*_clap* @eustlb
-/src/transformers/models/dac/mod*_dac* @eustlb
-/src/transformers/models/encodec/mod*_encodec* @eustlb
-/src/transformers/models/hubert/mod*_hubert* @eustlb
-/src/transformers/models/mctct/mod*_mctct* @eustlb
-/src/transformers/models/mimi/mod*_mimi* @eustlb
-/src/transformers/models/mms/mod*_mms* @eustlb
-/src/transformers/models/moshi/mod*_moshi* @eustlb
-/src/transformers/models/musicgen/mod*_musicgen* @eustlb
-/src/transformers/models/musicgen_melody/mod*_musicgen_melody* @eustlb
-/src/transformers/models/pop2piano/mod*_pop2piano* @eustlb
-/src/transformers/models/seamless_m4t/mod*_seamless_m4t* @eustlb
-/src/transformers/models/seamless_m4t_v2/mod*_seamless_m4t_v2* @eustlb
-/src/transformers/models/sew/mod*_sew* @eustlb
-/src/transformers/models/sew_d/mod*_sew_d* @eustlb
-/src/transformers/models/speech_to_text/mod*_speech_to_text* @eustlb
-/src/transformers/models/speech_to_text_2/mod*_speech_to_text_2* @eustlb
-/src/transformers/models/speecht5/mod*_speecht5* @eustlb
-/src/transformers/models/unispeech/mod*_unispeech* @eustlb
-/src/transformers/models/unispeech_sat/mod*_unispeech_sat* @eustlb
-/src/transformers/models/univnet/mod*_univnet* @eustlb
-/src/transformers/models/vits/mod*_vits* @eustlb
-/src/transformers/models/wav2vec2/mod*_wav2vec2* @eustlb
-/src/transformers/models/wav2vec2_bert/mod*_wav2vec2_bert* @eustlb
-/src/transformers/models/wav2vec2_conformer/mod*_wav2vec2_conformer* @eustlb
-/src/transformers/models/wav2vec2_phoneme/mod*_wav2vec2_phoneme* @eustlb
-/src/transformers/models/wavlm/mod*_wavlm* @eustlb
-/src/transformers/models/whisper/mod*_whisper* @eustlb
-/src/transformers/models/xls_r/mod*_xls_r* @eustlb
-/src/transformers/models/xlsr_wav2vec2/mod*_xlsr_wav2vec2* @eustlb
-
-# Video models
-/src/transformers/models/timesformer/mod*_timesformer* @Rocketknight1
-/src/transformers/models/videomae/mod*_videomae* @Rocketknight1
-/src/transformers/models/vivit/mod*_vivit* @Rocketknight1
-
-# Multimodal models
-/src/transformers/models/align/mod*_align* @zucchini-nlp
-/src/transformers/models/altclip/mod*_altclip* @zucchini-nlp
-/src/transformers/models/aria/mod*_aria* @zucchini-nlp
-/src/transformers/models/blip/mod*_blip* @zucchini-nlp
-/src/transformers/models/blip_2/mod*_blip_2* @zucchini-nlp
-/src/transformers/models/bridgetower/mod*_bridgetower* @zucchini-nlp
-/src/transformers/models/bros/mod*_bros* @zucchini-nlp
-/src/transformers/models/chameleon/mod*_chameleon* @zucchini-nlp
-/src/transformers/models/chinese_clip/mod*_chinese_clip* @zucchini-nlp
-/src/transformers/models/clip/mod*_clip* @zucchini-nlp
-/src/transformers/models/clipseg/mod*_clipseg* @zucchini-nlp
-/src/transformers/models/clvp/mod*_clvp* @zucchini-nlp
-/src/transformers/models/colpali/mod*_colpali* @zucchini-nlp @yonigozlan
-/src/transformers/models/data2vec/mod*_data2vec* @zucchini-nlp
-/src/transformers/models/deplot/mod*_deplot* @zucchini-nlp
-/src/transformers/models/donut/mod*_donut* @zucchini-nlp
-/src/transformers/models/flava/mod*_flava* @zucchini-nlp
-/src/transformers/models/git/mod*_git* @zucchini-nlp
-/src/transformers/models/grounding_dino/mod*_grounding_dino* @qubvel
-/src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp
-/src/transformers/models/idefics/mod*_idefics* @zucchini-nlp
-/src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp
-/src/transformers/models/idefics3/mod*_idefics3* @zucchini-nlp
-/src/transformers/models/instructblip/mod*_instructblip* @zucchini-nlp
-/src/transformers/models/instructblipvideo/mod*_instructblipvideo* @zucchini-nlp
-/src/transformers/models/kosmos_2/mod*_kosmos_2* @zucchini-nlp
-/src/transformers/models/layoutlm/mod*_layoutlm* @NielsRogge
-/src/transformers/models/layoutlmv2/mod*_layoutlmv2* @NielsRogge
-/src/transformers/models/layoutlmv3/mod*_layoutlmv3* @NielsRogge
-/src/transformers/models/layoutxlm/mod*_layoutxlm* @NielsRogge
-/src/transformers/models/lilt/mod*_lilt* @zucchini-nlp
-/src/transformers/models/llava/mod*_llava* @zucchini-nlp @arthurzucker
-/src/transformers/models/llava_next/mod*_llava_next* @zucchini-nlp
-/src/transformers/models/llava_next_video/mod*_llava_next_video* @zucchini-nlp
-/src/transformers/models/llava_onevision/mod*_llava_onevision* @zucchini-nlp
-/src/transformers/models/lxmert/mod*_lxmert* @zucchini-nlp
-/src/transformers/models/matcha/mod*_matcha* @zucchini-nlp
-/src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp
-/src/transformers/models/mllama/mod*_mllama* @zucchini-nlp
-/src/transformers/models/nougat/mod*_nougat* @NielsRogge
-/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @qubvel @yonigozlan
-/src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp
-/src/transformers/models/owlvit/mod*_owlvit* @qubvel
-/src/transformers/models/owlv2/mod*_owlv2* @qubvel
-/src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap
-/src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp
-/src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp
-/src/transformers/models/pixtral/mod*_pixtral* @zucchini-nlp @ArthurZucker
-/src/transformers/models/qwen2_audio/mod*_qwen2_audio* @zucchini-nlp @ArthurZucker
-/src/transformers/models/qwen2_vl/mod*_qwen2_vl* @zucchini-nlp @ArthurZucker
-/src/transformers/models/sam/mod*_sam* @zucchini-nlp @ArthurZucker
-/src/transformers/models/siglip/mod*_siglip* @zucchini-nlp
-/src/transformers/models/speech_encoder_decoder/mod*_speech_encoder_decoder* @zucchini-nlp
-/src/transformers/models/tapas/mod*_tapas* @NielsRogge
-/src/transformers/models/trocr/mod*_trocr* @zucchini-nlp
-/src/transformers/models/tvlt/mod*_tvlt* @zucchini-nlp
-/src/transformers/models/tvp/mod*_tvp* @zucchini-nlp
-/src/transformers/models/udop/mod*_udop* @zucchini-nlp
-/src/transformers/models/video_llava/mod*_video_llava* @zucchini-nlp
-/src/transformers/models/vilt/mod*_vilt* @zucchini-nlp
-/src/transformers/models/vipllava/mod*_vipllava* @zucchini-nlp
-/src/transformers/models/vision_encoder_decoder/mod*_vision_encoder_decoder* @Rocketknight1
-/src/transformers/models/vision_text_dual_encoder/mod*_vision_text_dual_encoder* @Rocketknight1
-/src/transformers/models/visual_bert/mod*_visual_bert* @zucchini-nlp
-/src/transformers/models/xclip/mod*_xclip* @zucchini-nlp
-
-# Reinforcement learning models
-/src/transformers/models/decision_transformer/mod*_decision_transformer* @Rocketknight1
-/src/transformers/models/trajectory_transformer/mod*_trajectory_transformer* @Rocketknight1
-
-# Time series models
-/src/transformers/models/autoformer/mod*_autoformer* @Rocketknight1
-/src/transformers/models/informer/mod*_informer* @Rocketknight1
-/src/transformers/models/patchtsmixer/mod*_patchtsmixer* @Rocketknight1
-/src/transformers/models/patchtst/mod*_patchtst* @Rocketknight1
-/src/transformers/models/time_series_transformer/mod*_time_series_transformer* @Rocketknight1
-
-# Graph models
-/src/transformers/models/graphormer/mod*_graphormer* @clefourrier
-
-# Finally, files with no owners that shouldn't generate pings, usually automatically generated and checked in the CI
-utils/dummy*
--- a/.github/stale.yml
+++ b/.github/stale.yml
@ -0,0 +1,18 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 60
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 7
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - pinned
+  - security
+  - Feature request
+# Label to use when marking an issue as stale
+staleLabel: wontfix
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed if no further activity occurs. Thank you
+  for your contributions.
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: false
--- a/.github/workflows/TROUBLESHOOT.md
+++ b/.github/workflows/TROUBLESHOOT.md
@ -1,9 +0,0 @@
-# Troubleshooting
-
-This is a document explaining how to deal with various issues on github-actions self-hosted CI. The entries may include actual solutions or pointers to Issues that cover those.
-
-## GitHub Actions (self-hosted CI)
-
-* Deepspeed
-
-  - if jit build hangs, clear out `rm -rf ~/.cache/torch_extensions/` reference: https://github.com/huggingface/transformers/pull/12723
--- a/.github/workflows/add-model-like.yml
+++ b/.github/workflows/add-model-like.yml
@ -1,80 +0,0 @@
-name: Add model like runner
-
-on:
-  push:
-    branches:
-      - none # put main here when this is fixed
-  #pull_request:
-  #  paths:
-  #    - "src/**"
-  #    - "tests/**"
-  #    - ".github/**"
-  #  types: [opened, synchronize, reopened]
-
-jobs:
-  run_tests_templates_like:
-    name: "Add new model like template tests"
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install dependencies
-        run: |
-          sudo apt -y update && sudo apt install -y libsndfile1-dev
-
-      - name: Load cached virtual environment
-        uses: actions/cache@v4
-        id: cache
-        with:
-          path: ~/venv/
-          key: v4-tests_model_like-${{ hashFiles('setup.py') }}
-
-      - name: Create virtual environment on cache miss
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          python -m venv ~/venv && . ~/venv/bin/activate
-          pip install --upgrade pip!=21.3
-          pip install -e .[dev]
-
-      - name: Check transformers location
-        # make `transformers` available as package (required since we use `-e` flag) and check it's indeed from the repo.
-        run: |
-          . ~/venv/bin/activate
-          python setup.py develop
-          transformers_install=$(pip list -e | grep transformers)
-          transformers_install_array=($transformers_install)
-          transformers_loc=${transformers_install_array[-1]}
-          transformers_repo_loc=$(pwd .)
-          if [ "$transformers_loc" != "$transformers_repo_loc" ]; then
-              echo "transformers is from $transformers_loc but it shoud be from $transformers_repo_loc/src."
-              echo "A fix is required. Stop testing."
-              exit 1
-          fi
-
-      - name: Create model files
-        run: |
-          . ~/venv/bin/activate
-          transformers add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
-          make style
-          make fix-copies
-
-      - name: Run all PyTorch modeling test
-        run: |
-          . ~/venv/bin/activate
-          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_new_models tests/bert_new/test_modeling_bert_new.py
-
-      - name: Run style changes
-        run: |
-          . ~/venv/bin/activate
-          make style && make quality && make repo-consistency
-
-      - name: Failure short reports
-        if: ${{ always() }}
-        run: cat reports/tests_new_models/failures_short.txt
-
-      - name: Test suite reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: run_all_tests_new_models_test_reports
-          path: reports/tests_new_models
--- a/.github/workflows/assign-reviewers.yml
+++ b/.github/workflows/assign-reviewers.yml
@ -1,26 +0,0 @@
-name: Assign PR Reviewers
-on:
-  pull_request_target:
-    branches:
-      - main
-    types: [ready_for_review]
-
-jobs:
-  assign_reviewers:
-    permissions:
-       pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.13'
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install PyGithub
-      - name: Run assignment script
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: python .github/scripts/assign_reviewers.py
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -1,76 +0,0 @@
-name: Self-hosted runner (benchmark)
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    types: [ opened, labeled, reopened, synchronize ]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-env:
-  HF_HOME: /mnt/cache
-
-jobs:
-  benchmark:
-    name: Benchmark
-    strategy:
-      matrix:
-        # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
-        group: [aws-g5-4xlarge-cache]
-    runs-on:
-      group: ${{ matrix.group }}
-    if: |
-      (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
-      (github.event_name == 'push' && github.ref == 'refs/heads/main')
-    container:
-      image: huggingface/transformers-pytorch-gpu
-      options: --gpus all --privileged --ipc host
-    steps:
-      - name: Get repo
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha || github.sha }}
-
-      - name: Install libpq-dev & psql
-        run: |
-          apt update
-          apt install -y libpq-dev postgresql-client
-
-      - name: Install benchmark script dependencies
-        run: python3 -m pip install -r benchmark/requirements.txt
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
-
-      - name: Run database init script
-        run: |
-          psql -f benchmark/init_db.sql
-        env:
-          PGDATABASE: metrics
-          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
-          PGUSER: transformers_benchmarks
-          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
-
-      - name: Run benchmark
-        run: |
-          git config --global --add safe.directory /__w/transformers/transformers
-          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
-            commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
-          elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
-            commit_id=$GITHUB_SHA
-          fi
-          commit_msg=$(git show -s --format=%s | cut -c1-70)
-          python3 benchmark/benchmarks_entrypoint.py "huggingface/transformers" "$BRANCH_NAME" "$commit_id" "$commit_msg"
-        env:
-          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-          # Enable this to see debug logs
-          # HF_HUB_VERBOSITY: debug
-          # TRANSFORMERS_VERBOSITY: debug
-          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
-          PGUSER: transformers_benchmarks
-          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
-          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
--- a/.github/workflows/build-ci-docker-images.yml
+++ b/.github/workflows/build-ci-docker-images.yml
@ -1,77 +0,0 @@
-name: Build pr ci-docker
-
-on:
-  push:
-    branches:
-      - push-ci-image # for now let's only build on this branch
-  repository_dispatch:
-  workflow_call:
-    inputs:
-      image_postfix:
-        required: true
-        type: string
-  schedule:
-    - cron: "6 0 * * *"
-
-
-concurrency:
-  group: ${{ github.workflow }}
-  cancel-in-progress: true
-
-jobs:
-  build:
-    runs-on: ubuntu-22.04
-
-    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
-
-    strategy:
-      matrix:
-        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch",  "examples-tf"]
-    continue-on-error: true
-
-    steps:
-      -
-        name: Set tag
-        run: |
-              if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
-                  echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
-                  echo "setting it to DEV!"
-              else
-                  echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
-
-              fi
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build ${{ matrix.file }}.dockerfile
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker
-          build-args: |
-            REF=${{ github.sha }}
-          file: "./docker/${{ matrix.file }}.dockerfile"
-          push: ${{ contains(github.event.head_commit.message, 'ci-image]') ||  github.event_name == 'schedule' }}
-          tags: ${{ env.TAG }}
-
-  notify:
-    runs-on: ubuntu-22.04
-    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
-    steps:
-      - name: Post to Slack
-        if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: "#transformers-ci-circleci-images"
-          title: 🤗 New docker images for CircleCI are pushed.
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -1,355 +0,0 @@
-name: Build docker images (scheduled)
-
-on:
-  push:
-    branches:
-      - build_ci_docker_image*
-  repository_dispatch:
-  workflow_call:
-    inputs:
-      image_postfix:
-        required: true
-        type: string
-  schedule:
-    - cron: "17 0 * * *"
-
-concurrency:
-  group: docker-images-builds
-  cancel-in-progress: false
-
-jobs:
-  latest-docker:
-    name: "Latest PyTorch [dev]"
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu-push-ci
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  latest-torch-deepspeed-docker:
-    name: "Latest PyTorch + DeepSpeed"
-    runs-on:
-      group: aws-g4dn-2xlarge-cache
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
-  latest-torch-deepspeed-docker-for-push-ci-daily-build:
-    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  doc-builder:
-    name: "Doc builder"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-doc-builder
-          push: true
-          tags: huggingface/transformers-doc-builder
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-doc-builder docker build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  latest-pytorch:
-    name: "Latest PyTorch [dev]"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-gpu
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  latest-pytorch-amd:
-    name: "Latest PyTorch (AMD) [dev]"
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  latest-pytorch-deepspeed-amd:
-    name: "PyTorch + DeepSpeed (AMD) [dev]"
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
-  latest-quantization-torch-docker:
-    name: "Latest Pytorch + Quantization [dev]"
-     # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-quantization-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-quantization-latest-gpu build
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@ -1,67 +0,0 @@
-name: Build docker images (Nightly CI)
-
-on:
-  workflow_call:
-  push:
-    branches:
-      - build_nightly_ci_docker_image*
-
-concurrency:
-  group: docker-images-builds
-  cancel-in-progress: false
-
-jobs:
-  latest-with-torch-nightly-docker:
-    name: "Nightly PyTorch + Stable TensorFlow"
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v3
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-            PYTORCH=pre
-          push: true
-          tags: huggingface/transformers-all-latest-torch-nightly-gpu
-
-  nightly-torch-deepspeed-docker:
-    name: "Nightly PyTorch + DeepSpeed"
-    runs-on:
-      group: aws-g4dn-2xlarge-cache
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v3
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-nightly-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
@ -1,101 +0,0 @@
-name: Build docker images (Past CI)
-
-on:
-  push:
-    branches:
-      - build_past_ci_docker_image*
-
-concurrency:
-  group: docker-images-builds
-  cancel-in-progress: false
-
-jobs:
-  past-pytorch-docker:
-    name: "Past PyTorch Docker"
-    strategy:
-      fail-fast: false
-      matrix:
-        version: ["1.13", "1.12", "1.11"]
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        id: get-base-image
-        name: Get Base Image
-        env:
-          framework_version: ${{ matrix.version }}
-        run: |
-          echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["pytorch"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
-      -
-        name: Print Base Image
-        run: |
-          echo ${{ steps.get-base-image.outputs.base_image }}
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v3
-        with:
-          context: ./docker/transformers-past-gpu
-          build-args: |
-            REF=main
-            BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
-            FRAMEWORK=pytorch
-            VERSION=${{ matrix.version }}
-          push: true
-          tags: huggingface/transformers-pytorch-past-${{ matrix.version }}-gpu
-
-  past-tensorflow-docker:
-    name: "Past TensorFlow Docker"
-    strategy:
-      fail-fast: false
-      matrix:
-        version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
-    runs-on:
-      group: aws-general-8-plus
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        id: get-base-image
-        name: Get Base Image
-        env:
-          framework_version: ${{ matrix.version }}
-        run: |
-          echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["tensorflow"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
-      -
-        name: Print Base Image
-        run: |
-          echo ${{ steps.get-base-image.outputs.base_image }}
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v3
-        with:
-          context: ./docker/transformers-past-gpu
-          build-args: |
-            REF=main
-            BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
-            FRAMEWORK=tensorflow
-            VERSION=${{ matrix.version }}
-          push: true
-          tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@ -1,23 +0,0 @@
-name: Build documentation
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - main
-      - doc-builder*
-      - v*-release
-      - use_templates
-
-jobs:
-   build:
-    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
-    with:
-      commit_sha: ${{ github.sha }}
-      package: transformers
-      notebook_folder: transformers_doc
-      languages: ar de en es fr hi it ko pt tr zh ja te
-      custom_container: huggingface/transformers-doc-builder
-    secrets:
-      token: ${{ secrets.HUGGINGFACE_PUSH }}
-      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@ -1,26 +0,0 @@
-name: Build PR Documentation
-
-on:
-  pull_request:
-  workflow_call:
-    inputs:
-      pr_number:
-        type: string
-        required: true
-      commit_sha:
-        type: string
-        required: true
-
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  build:
-    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@6e2eb04a2604817c97be03786efa494fe3acae90
-    with:
-      commit_sha: ${{ inputs.commit_sha || github.event.pull_request.head.sha }}
-      pr_number: ${{ inputs.pr_number || github.event.number }}
-      package: transformers
-      languages: en
--- a/.github/workflows/check_failed_tests.yml
+++ b/.github/workflows/check_failed_tests.yml
@ -1,205 +0,0 @@
-name: Process failed tests
-
-on:
-  workflow_call:
-    inputs:
-      docker:
-        required: true
-        type: string
-      start_sha:
-        required: true
-        type: string
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-
-
-jobs:
-  check_new_failures:
-    name: " "
-    runs-on:
-      group: aws-g4dn-4xlarge-cache
-    container:
-      image: ${{ inputs.docker }}
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - uses: actions/download-artifact@v4
-        with:
-          name: ci_results_${{ inputs.job }}
-          path: /transformers/ci_results_${{ inputs.job }}
-
-      - name: Check file
-        working-directory: /transformers
-        run: |
-          if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
-            echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
-            echo "process=true" >> $GITHUB_ENV
-          else
-            echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
-            echo "process=false" >> $GITHUB_ENV
-          fi
-
-      - uses: actions/download-artifact@v4
-        if: ${{ env.process == 'true' }}
-        with:
-          pattern: setup_values*
-          path: setup_values
-          merge-multiple: true
-
-      - name: Prepare some setup values
-        if: ${{ env.process == 'true' }}
-        run: |
-          if [ -f setup_values/prev_workflow_run_id.txt ]; then
-            echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-          if [ -f setup_values/other_workflow_run_id.txt ]; then
-            echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-      - name: Update clone
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Get target commit
-        working-directory: /transformers/utils
-        if: ${{ env.process == 'true' }}
-        run: |
-          echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
-
-      - name: Checkout to `start_sha`
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: git fetch && git checkout ${{ inputs.start_sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        if: ${{ env.process == 'true' }}
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: pip freeze
-
-      - name: Check failed tests
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
-
-      - name: Show results
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: |
-          ls -l new_failures_with_bad_commit.json
-          cat new_failures_with_bad_commit.json
-
-      - name: Checkout back
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: |
-          git checkout ${{ inputs.start_sha }}
-
-      - name: Process report
-        shell: bash
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-          JOB_NAME: ${{ inputs.job }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
-        run: |
-          python3 utils/process_bad_commit_report.py
-
-      - name: Process report
-        shell: bash
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-          JOB_NAME: ${{ inputs.job }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
-        run: |
-          {
-            echo 'REPORT_TEXT<<EOF'
-            python3 utils/process_bad_commit_report.py
-            echo EOF
-          } >> "$GITHUB_ENV"
-
-      - name: Prepare Slack report title
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: |
-          pip install slack_sdk
-          echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
-
-      - name: Send processed report
-        if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
-        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
-        with:
-          # Slack channel id, channel name, or user id to post message.
-          # See also: https://api.slack.com/methods/chat.postMessage#channels
-          channel-id: '#${{ inputs.slack_report_channel }}'
-          # For posting a rich message using Block Kit
-          payload: |
-            {
-              "blocks": [
-                {
-                  "type": "header",
-                  "text": {
-                    "type": "plain_text",
-                    "text": "${{ env.title }}"
-                  }
-                },
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "${{ env.REPORT_TEXT }}"
-                  }
-                }
-              ]
-            }
-        env:
-          SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/check_tiny_models.yml
+++ b/.github/workflows/check_tiny_models.yml
@ -1,82 +0,0 @@
-name: Check Tiny Models
-
-on:
-  push:
-    branches:
-      - check_tiny_models*
-  repository_dispatch:
-  schedule:
-    - cron: "0 2 * * *"
-
-env:
-  TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
-
-jobs:
-  check_tiny_models:
-    name: Check tiny models
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - uses: actions/checkout@v4
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v5
-        with:
-          # Semantic version range syntax or exact version of a Python version
-          python-version: '3.8'
-          # Optional - x64 or x86 architecture, defaults to x64
-          architecture: 'x64'
-
-      - name: Install
-        run: |
-          sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng cmake
-          pip install --upgrade pip
-          python -m pip install -U .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video,tf-cpu]
-          pip install tensorflow_probability
-          python -m pip install -U 'natten<0.15.0'
-
-      - name: Create all tiny models (locally)
-        run: |
-          python utils/create_dummy_models.py tiny_local_models --all --num_workers 2
-
-      - name: Local tiny model reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tiny_local_model_creation_reports
-          path: tiny_local_models/reports
-
-      # GitHub-hosted runners have 2-core CPUs
-      - name: Run pipeline tests against all new (local) tiny models
-        run: |
-          OMP_NUM_THREADS=1 TRANSFORMERS_TINY_MODEL_PATH=tiny_local_models python -m pytest --max-worker-restart=0 -n 2 --dist=loadfile -s -rA --make-reports=tests_pipelines tests/models -m is_pipeline_test -k "test_pipeline_" | tee tests_output.txt
-
-      - name: Test suite reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tiny_local_model_creation_reports
-          path: reports/tests_pipelines
-
-      - name: Create + Upload tiny models for new model architecture(s)
-        run: |
-          python utils/update_tiny_models.py --num_workers 2
-
-      - name: Full report
-        run: cat tiny_models/reports/tiny_model_creation_report.json
-
-      - name: Failure report
-        run: cat tiny_models/reports/simple_failed_report.txt
-
-      - name: Summary report
-        run: cat tiny_models/reports/tiny_model_summary.json
-
-      - name: New tiny model creation reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tiny_model_creation_reports
-          path: tiny_models/reports
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@ -1,83 +0,0 @@
-name: Doctest job
-
-on:
-  workflow_call:
-    inputs:
-      job_splits:
-        required: true
-        type: string
-      split_keys:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  RUN_SLOW: yes
-  OMP_NUM_THREADS: 16
-  MKL_NUM_THREADS: 16
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-
-jobs:
-  run_doctests:
-    name: " "
-    strategy:
-      max-parallel: 8  # 8 jobs at a time
-      fail-fast: false
-      matrix:
-        split_keys: ${{ fromJson(inputs.split_keys) }}
-    runs-on: 
-      group: aws-g4dn-4xlarge-cache
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[flax]
-
-      - name: GPU visibility
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Get doctest files
-        working-directory: /transformers
-        run: |
-          echo "${{ toJson(fromJson(inputs.job_splits)[matrix.split_keys]) }}" > doc_tests.txt
-          cat doc_tests.txt
-
-      - name: Set `split_keys`
-        shell: bash
-        run: |
-          echo "${{ matrix.split_keys }}"
-          split_keys=${{ matrix.split_keys }}
-          split_keys=${split_keys//'/'/'_'}
-          echo "split_keys"
-          echo "split_keys=$split_keys" >> $GITHUB_ENV
-
-      - name: Run doctests
-        working-directory: /transformers
-        run: |
-          cat doc_tests.txt
-          python3 -m pytest -v --make-reports doc_tests_gpu_${{ env.split_keys }} --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/doc_tests_gpu_${{ env.split_keys }}/failures_short.txt
-
-      - name: "Test suite reports artifacts: doc_tests_gpu_test_reports_${{ env.split_keys }}"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: doc_tests_gpu_test_reports_${{ env.split_keys }}
-          path: /transformers/reports/doc_tests_gpu_${{ env.split_keys }}
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@ -1,89 +0,0 @@
-name: Doctests
-
-on:
-  push:
-    branches:
-      - run_doctest*
-  repository_dispatch:
-  schedule:
-    - cron: "17 2 * * *"
-
-env:
-  NUM_SLICES: 3
-
-jobs:
-  setup:
-    name: Setup
-    runs-on: 
-      group: aws-g4dn-4xlarge-cache
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      job_splits: ${{ steps.set-matrix.outputs.job_splits }}
-      split_keys: ${{ steps.set-matrix.outputs.split_keys }}
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Check values for matrix
-        working-directory: /transformers
-        run: |
-          python3 utils/split_doctest_jobs.py
-          python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }}
-
-      - id: set-matrix
-        working-directory: /transformers
-        name: Set values for matrix
-        run: |
-          echo "job_splits=$(python3 utils/split_doctest_jobs.py)" >> $GITHUB_OUTPUT
-          echo "split_keys=$(python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-
-  call_doctest_job:
-    name: "Call doctest jobs"
-    needs: setup
-    strategy:
-      max-parallel: 1  # 1 split at a time (in `doctest_job.yml`, we set `8` to run 8 jobs at the same time)
-      fail-fast: false
-      matrix:
-        split_keys: ${{ fromJson(needs.setup.outputs.split_keys) }}
-    uses: ./.github/workflows/doctest_job.yml
-    with:
-      job_splits: ${{ needs.setup.outputs.job_splits }}
-      split_keys: ${{ toJson(matrix.split_keys) }}
-    secrets: inherit
-
-  send_results:
-    name: Send results to webhook
-    runs-on: ubuntu-22.04
-    if: always()
-    needs: [call_doctest_job]
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          # Use `CI_SLACK_CHANNEL_DUMMY_TESTS` when doing experimentation
-          SLACK_REPORT_CHANNEL: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
-        run: |
-          pip install slack_sdk
-          python utils/notification_service_doc_tests.py
-
-      - name: "Upload results"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: doc_test_results
-          path: doc_test_results
--- a/.github/workflows/github-torch-hub.yml
+++ b/.github/workflows/github-torch-hub.yml
@ -0,0 +1,46 @@
+name: Torch hub integration
+
+on:
+  push:
+    branches:
+      - "*"
+
+jobs:
+  torch_hub_integration:
+    runs-on: ubuntu-latest
+    env:
+      # TODO quickfix but may need more investigation
+      ACTIONS_ALLOW_UNSECURE_COMMANDS: True
+    steps:
+    # no checkout necessary here.
+    - name: Extract branch name
+      run: echo "::set-env name=BRANCH::${GITHUB_REF#refs/heads/}"
+    - name: Check branch name
+      run: echo $BRANCH
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+
+    - name: Loading cache
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ~/.cache/pip
+        key: v0-torch_hub-${{ hashFiles('setup.py') }}
+
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        # install torch-hub specific dependencies
+        pip install -e git+https://github.com/huggingface/transformers.git#egg=transformers[torchhub]
+        # no longer needed
+        pip uninstall -y transformers
+
+    - name: Torch hub list
+      run: |
+        python -c "import torch; print(torch.hub.list('huggingface/transformers:$BRANCH'))"
+
+    - name: Torch hub help
+      run: |
+        python -c "import torch; print(torch.hub.help('huggingface/transformers:$BRANCH', 'modelForSequenceClassification'))"
--- a/.github/workflows/model-templates.yml
+++ b/.github/workflows/model-templates.yml
@ -0,0 +1,70 @@
+name: Model templates runner
+
+on:
+  push:
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+  pull_request_target:
+    branches:
+      - master
+
+jobs:
+  run_tests_templates:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v1
+
+      - name: Install Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.6
+
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: ~/.cache/pip
+          key: v1.2-tests_templates
+          restore-keys: |
+            v1.2-tests_templates-${{ hashFiles('setup.py') }}
+            v1.2-tests_templates
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install .[dev]
+      - name: Create model files
+        run: |
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
+          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
+          make style
+          python utils/check_table.py --fix_and_overwrite
+          python utils/check_dummies.py --fix_and_overwrite
+
+      - name: Run all non-slow tests
+        run: |
+          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_templates tests/*template*
+
+      - name: Run style changes
+        run: |
+          git fetch origin master:master
+          make fixup
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_templates_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_templates_test_reports
+          path: reports
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -1,142 +0,0 @@
-name: model jobs
-
-on:
-  workflow_call:
-    inputs:
-      folder_slices:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      slice_id:
-        required: true
-        type: number
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-      report_name_prefix:
-        required: false
-        default: run_models_gpu
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  run_models_gpu:
-    name: " "
-    strategy:
-      max-parallel: 8
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on:
-      group: '${{ inputs.machine_type }}'
-    container:
-      image: ${{ inputs.docker }}
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ inputs.folder_slices }}"
-          echo "${{ matrix.folders }}"
-          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install -U datasets
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ inputs.machine_type }}"
-
-          if [ "${{ inputs.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ inputs.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Run test
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
--- a/.github/workflows/model_jobs_amd.yml
+++ b/.github/workflows/model_jobs_amd.yml
@ -1,128 +0,0 @@
-name: model jobs
-
-on:
-  workflow_call:
-    inputs:
-      folder_slices:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      slice_id:
-        required: true
-        type: number
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  run_models_gpu:
-    name: " "
-    strategy:
-      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: ${{ inputs.docker }}
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ inputs.folder_slices }}"
-          echo "${{ matrix.folders }}"
-          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install -U datasets
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}  -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Run test
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/new_model_pr_merged_notification.yml
+++ b/.github/workflows/new_model_pr_merged_notification.yml
@ -1,68 +0,0 @@
-# Used to notify core maintainers about new model PR being merged
-name: New model PR merged notification
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'src/transformers/models/*/modeling_*'
-
-jobs:
-  notify_new_model:
-    name: Notify new model
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Check new model
-        shell: bash
-        run: |
-          python -m pip install gitpython
-          python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt
-          echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-          echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV
-
-      - name: print commit sha
-        if: ${{ env.NEW_MODEL != ''}}
-        shell: bash
-        run: |
-          echo "$COMMIT_SHA"
-
-      - name: print new model
-        if: ${{ env.NEW_MODEL != ''}}
-        shell: bash
-        run: |
-          echo "$NEW_MODEL"
-
-      - name: Notify
-        if: ${{ env.NEW_MODEL != ''}}
-        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
-        with:
-          # Slack channel id, channel name, or user id to post message.
-          # See also: https://api.slack.com/methods/chat.postMessage#channels
-          channel-id: transformers-new-model-notification
-          # For posting a rich message using Block Kit
-          payload: |
-            {
-              "blocks": [
-                {
-                  "type": "header",
-                  "text": {
-                    "type": "plain_text",
-                    "text": "New model!",
-                    "emoji": true
-                  }
-                },
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh\ncommit SHA: ${{ env.COMMIT_SHA }}"
-                  }
-                }
-              ]
-            }
-        env:
-          SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/pr-style-bot.yml
+++ b/.github/workflows/pr-style-bot.yml
@ -1,34 +0,0 @@
-# To run this bot, comment "@bot /style" on a PR
-name: Style Bot
-
-on:
-  issue_comment:
-    types: [created]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  style:
-    uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@639ee721e149a281fe726a50a2cc1354b48bc463
-    with:
-      python_quality_dependencies: "[quality]"
-      style_command_type: "default"
-    secrets:
-      bot_token: ${{ secrets.GITHUB_TOKEN }}
-
-  check-outputs:
-    runs-on: ubuntu-latest
-    needs: style
-    steps:
-      - run: echo ${{ needs.style.outputs.pr_number }}
-      - run: echo ${{ needs.style.outputs.new_commit_sha }}
-
-  trigger:
-    needs: style
-    if: needs.style.outputs.new_commit_sha != ''
-    uses: "./.github/workflows/build_pr_documentation.yml"
-    with:
-      pr_number: ${{ needs.style.outputs.pr_number }}
-      commit_sha: ${{ needs.style.outputs.new_commit_sha }}
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -1,135 +0,0 @@
-name: Slow tests on important models (on Push - A10)
-
-on:
-  push:
-    branches: [ main ]
-
-env:
-  OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-
-jobs:
-  get_modified_models:
-    name: "Get all modified files"
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v4
-
-      - name: Get changed files
-        id: changed-files
-        uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
-        with:
-          files: src/transformers/models/**
-
-      - name: Run step if only the files listed above change
-        if: steps.changed-files.outputs.any_changed == 'true'
-        id: set-matrix
-        env:
-          ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
-        run: |
-            model_arrays=()
-            for file in $ALL_CHANGED_FILES; do
-                model_path="${file#*models/}"
-                model_path="models/${model_path%%/*}"
-                if grep -qFx "$model_path" utils/important_models.txt; then
-                    # Append the file to the matrix string
-                    model_arrays+=("$model_path")
-                fi
-            done
-            matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
-            echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
-  test_modified_files:
-    needs: get_modified_models
-    name: Slow & FA2 tests
-    runs-on:
-      group: aws-g5-4xlarge-cache
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
-    strategy:
-      fail-fast: false
-      matrix:
-        model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
-
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v4
-
-      - name: Install locally transformers & other libs
-        run: |
-          apt install sudo
-          sudo -H pip install --upgrade pip
-          sudo -H pip uninstall -y transformers
-          sudo -H pip install -U -e ".[testing]"
-          MAX_JOBS=4 pip install flash-attn --no-build-isolation
-          pip install bitsandbytes
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Run FA2 tests
-        id: run_fa2_tests
-        run:
-          pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
-
-      - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.model-name }}_fa2_tests
-          path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
-          title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
-          status: ${{ steps.run_fa2_tests.conclusion}}
-          slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-
-      - name: Run integration tests
-        id: run_integration_tests
-        if: always()
-        run:
-          pytest -rsfE -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
-
-      - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tests_integration_${{ matrix.model-name }}
-          path: /transformers/reports/tests_integration_${{ matrix.model-name }}
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
-          title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
-          status: ${{ steps.run_integration_tests.conclusion}}
-          slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-
-      - name: Tailscale # In order to be able to SSH when a test fails
-        if: ${{ runner.debug == '1'}}
-        uses: huggingface/tailscale-action@v1
-        with:
-          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
-          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
-          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-          waitForSSH: true
--- a/.github/workflows/release-conda.yml
+++ b/.github/workflows/release-conda.yml
@ -4,29 +4,26 @@ on:
  push:
    tags:
      - v*
-    branches:
-      - conda_*

 env:
  ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}

 jobs:
  build_and_package:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

      - name: Install miniconda
        uses: conda-incubator/setup-miniconda@v2
        with:
          auto-update-conda: true
          auto-activate-base: false
-          python-version: 3.8
          activate-environment: "build-transformers"
          channels: huggingface

--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -1,416 +0,0 @@
-name: PR comment GitHub CI
-
-on:
-  issue_comment:
-    types:
-      - created
-    branches-ignore:
-      - main
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
-  cancel-in-progress: true
-permissions: read-all
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  get-pr-number:
-    runs-on: ubuntu-22.04
-    name: Get PR number
-    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
-    outputs:
-      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
-    steps:
-      - name: Get PR number
-        shell: bash
-        run: |
-          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
-            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
-          else
-            echo "PR_NUMBER=" >> $GITHUB_ENV
-          fi
-
-      - name: Check PR number
-        shell: bash
-        run: |
-          echo "${{ env.PR_NUMBER }}"
-
-      - name: Set PR number
-        id: set_pr_number
-        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
-
-  get-sha:
-    runs-on: ubuntu-22.04
-    needs: get-pr-number
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    outputs:
-      PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
-      PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
-
-      - name: Get SHA (and verify timestamps against the issue comment date)
-        id: get_sha
-        env:
-          PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
-          COMMENT_DATE: ${{ github.event.comment.created_at }}
-        run: |
-            git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
-            git checkout refs/remotes/pull/$PR_NUMBER/head
-            echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
-            echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
-            git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
-            git checkout refs/remotes/pull/$PR_NUMBER/merge
-            echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
-            echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
-            PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
-            echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
-            COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
-            echo "COMMENT_DATE: $COMMENT_DATE"
-            echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
-            if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
-              echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
-              exit -1;
-            fi
-
-  # use a python script to handle this complex logic
-  # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
-  # case 2: `run-slow model_1, model_2`
-  get-tests:
-    runs-on: ubuntu-22.04
-    needs: [get-pr-number, get-sha]
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    outputs:
-      models: ${{ steps.models_to_run.outputs.models }}
-      quantizations: ${{ steps.models_to_run.outputs.quantizations }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        run: |
-            PR_MERGE_SHA=$(git log -1 --format=%H)
-            if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-              echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-              exit -1;
-            fi
-
-      - name: Get models to test
-        env:
-          PR_COMMENT: ${{ github.event.comment.body }}
-        run: |
-          python -m pip install GitPython
-          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
-          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
-          echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
-
-      - name: Show models to test
-        id: models_to_run
-        run: |
-          echo "${{ env.models }}"
-          echo "models=${{ env.models }}" >> $GITHUB_ENV
-          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
-          echo "${{ env.quantizations }}"
-          echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
-
-  reply_to_comment:
-    name: Reply to the comment
-    if: ${{ needs.get-tests.outputs.models != '[]'  || needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-pr-number, get-tests]
-    permissions:
-      pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Reply to the comment
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          MODELS: ${{ needs.get-tests.outputs.models }}
-          BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
-
-  create_run:
-    name: Create run
-    if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-sha, get-tests, reply_to_comment]
-    permissions:
-      statuses: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Create Run
-        id: create_run
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
-          # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
-          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
-
-  run_models_gpu:
-    name: Run all tests for the model
-    if: ${{ needs.get-tests.outputs.models != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.models) }}
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-       group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout to PR merge commit
-        working-directory: /transformers
-        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        working-directory: /transformers
-        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: |
-          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
-          echo $CUDA_VISIBLE_DEVICES
-          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Make sure report directory exists
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-
-  run_quantization_torch_gpu:
-    name: Run all tests for a quantization
-    if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-quantization-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout to PR merge commit
-        working-directory: /transformers
-        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        working-directory: /transformers
-        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run quantization tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Make sure report directory exists
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
-
-  update_run_status:
-    name: Update Check Run Status
-    needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
-    permissions:
-      statuses: write
-    if: ${{ always() && needs.create_run.result == 'success' }}
-    runs-on: ubuntu-22.04
-    env:
-      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
-    steps:
-      - name: Get `run_models_gpu` job status
-        run: |
-          echo "${{ needs.run_models_gpu.result }}"
-          echo "${{ needs.run_quantization_torch_gpu.result }}"
-          echo $STATUS_OK
-          if [ "$STATUS_OK" = "true" ]; then
-            echo "STATUS=success" >> $GITHUB_ENV
-          else
-            echo "STATUS=failure" >> $GITHUB_ENV
-          fi
-
-      - name: Update PR commit statuses
-        run: |
-          echo "${{ needs.run_models_gpu.result }}"
-          echo "${{ env.STATUS }}"
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
--- a/.github/workflows/self-nightly-caller.yml
+++ b/.github/workflows/self-nightly-caller.yml
@ -1,43 +0,0 @@
-name: Self-hosted runner (nightly-ci)
-
-
-on:
-  repository_dispatch:
-  schedule:
-    - cron: "17 2 * * *"
-  push:
-    branches:
-      - run_nightly_ci*
-
-jobs:
-  build_nightly_ci_images:
-    name: Build Nightly CI Docker Images
-    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
-    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
-    secrets: inherit
-
-  model-ci:
-    name: Model CI
-    needs: [build_nightly_ci_images]
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: ci
-      docker: huggingface/transformers-all-latest-torch-nightly-gpu
-      ci_event: Nightly CI
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    needs: [build_nightly_ci_images]
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: ci
-      # test deepspeed nightly build with the latest release torch
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Nightly CI
-      working-directory-prefix: /workspace
-    secrets: inherit
--- a/.github/workflows/self-nightly-past-ci-caller.yml
+++ b/.github/workflows/self-nightly-past-ci-caller.yml
@ -1,99 +0,0 @@
-name: Self-hosted runner (nightly-past-ci-caller)
-
-on:
-  schedule:
-    - cron: "17 2,14 * * *"
-  push:
-    branches:
-      - run_past_ci*
-
-jobs:
-  get_number:
-    name: Get number
-    runs-on: ubuntu-22.04
-    outputs:
-      run_number: ${{ steps.get_number.outputs.run_number }}
-    steps:
-      - name: Get number
-        id: get_number
-        run: |
-          echo "${{ github.run_number }}"
-          echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
-          echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
-
-  run_past_ci_tensorflow_2-11:
-    name: TensorFlow 2.11
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 3 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.11"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-10:
-    name: TensorFlow 2.10
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 4 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.10"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-9:
-    name: TensorFlow 2.9
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 5 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.9"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-8:
-    name: TensorFlow 2.8
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 6 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.8"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-7:
-    name: TensorFlow 2.7
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 7 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.7"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-6:
-    name: TensorFlow 2.6
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 8 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.6"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
-  run_past_ci_tensorflow_2-5:
-    name: TensorFlow 2.5
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 9 &&  (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
-    with:
-      framework: tensorflow
-      version: "2.5"
-      sha: ${{ github.sha }}
-    secrets: inherit
--- a/.github/workflows/self-past-caller.yml
+++ b/.github/workflows/self-past-caller.yml
@ -1,40 +0,0 @@
-name: Self-hosted runner (past-ci)
-
-
-on:
-  workflow_call:
-    inputs:
-      framework:
-        required: true
-        type: string
-      version:
-        required: true
-        type: string
-      # Use this to control the commit to test against
-      sha:
-        default: 'main'
-        required: false
-        type: string
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: past-ci
-      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
-      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: past-ci
-      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
-      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
-    secrets: inherit
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@ -1,25 +0,0 @@
-name: Self-hosted runner (AMD mi210 CI caller)
-
-on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi210
-    secrets: inherit
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@ -1,25 +0,0 @@
-name: Self-hosted runner (AMD mi250 CI caller)
-
-on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi250
-    secrets: inherit
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@ -1,25 +0,0 @@
-name: Self-hosted runner (AMD mi300 CI caller)
-
-on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi300
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi300
-    secrets: inherit
--- a/.github/workflows/self-push-amd.yml
+++ b/.github/workflows/self-push-amd.yml
@ -1,334 +0,0 @@
-name: Self-hosted runner AMD GPU (push)
-
-on:
-  workflow_call:
-    inputs:
-      gpu_flavor:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  PYTEST_TIMEOUT: 60
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-
-jobs:
-  check_runner_status:
-    name: Check Runner Status
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - name: Check Runner Status
-        run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-
-  check_runners:
-    name: Check Runners
-    needs: check_runner_status
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-  setup_gpu:
-    name: Setup
-    needs: check_runners
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      test_map: ${{ steps.set-matrix.outputs.test_map }}
-    env:
-      # `CI_BRANCH_PUSH`: The branch name from the push event
-      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
-      # `CI_SHA_PUSH`: The commit SHA from the push event
-      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
-        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Fetch the tests to run
-        working-directory: /transformers
-        # TODO: add `git-python` in the docker images
-        run: |
-          pip install --upgrade git-python
-          python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
-
-      - name: Report fetched tests
-        uses: actions/upload-artifact@v4
-        with:
-          name: test_fetched
-          path: /transformers/test_preparation.txt
-
-      - id: set-matrix
-        name: Organize tests into models
-        working-directory: /transformers
-        # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
-        # The `test_map` is used to get the actual identified test files under each key.
-        # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
-        run: |
-          if [ -f test_map.json ]; then
-              keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
-              test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
-          else
-              keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
-              test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
-          fi
-          echo $keys
-          echo $test_map
-          echo "matrix=$keys" >> $GITHUB_OUTPUT
-          echo "test_map=$test_map" >> $GITHUB_OUTPUT
-
-  run_models_gpu:
-    name: Model tests
-    needs: setup_gpu
-    # `dummy` means there is no test to run
-    if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-
-  send_results:
-    name: Send results to webhook
-    runs-on: ubuntu-22.04
-    if: always()
-    needs: [
-        check_runner_status,
-        check_runners,
-        setup_gpu,
-        run_models_gpu,
-#        run_tests_torch_cuda_extensions_single_gpu,
-#        run_tests_torch_cuda_extensions_multi_gpu
-    ]
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      - name: Preliminary job status
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          echo "Runner availability: ${{ needs.check_runner_status.result }}"
-          echo "Setup status: ${{ needs.setup_gpu.result }}"
-          echo "Runner status: ${{ needs.check_runners.result }}"
-
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - uses: actions/checkout@v4
-        # To avoid failure when multiple commits are merged into `main` in a short period of time.
-        # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
-        # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
-        with:
-          fetch-depth: 20
-
-      - name: Update clone using environment variables
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
-          CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
-          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
-          CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
-          CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
-          CI_SHA: ${{ env.CI_SHA }}
-          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
-          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
-          SETUP_STATUS: ${{ needs.setup_gpu.result }}
-
-        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
-        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
-        run: |
-          pip install huggingface_hub
-          pip install slack_sdk
-          pip show slack_sdk
-          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
--- a/.github/workflows/self-push-caller.yml
+++ b/.github/workflows/self-push-caller.yml
@ -1,54 +0,0 @@
-# Used to trigger self-push CI
-name: Self-hosted runner (push-caller)
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  check-for-setup:
-      runs-on: ubuntu-22.04
-      name: Check if setup was changed
-      outputs:
-        changed: ${{ steps.was_changed.outputs.changed }}
-      steps:
-        - uses: actions/checkout@v4
-          with: 
-            fetch-depth: "2"
-        
-        - name: Get changed files
-          id: changed-files
-          uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
-        
-        - name: Was setup changed 
-          id: was_changed
-          run: |
-            for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
-              if [ `basename "${file}"` = "setup.py" ]; then
-                echo "changed=1" >> $GITHUB_OUTPUT
-              fi
-            done
-
-  build-docker-containers:
-    needs: check-for-setup
-    if: (github.event_name == 'push') && (needs.check-for-setup.outputs.changed == '1')
-    uses: ./.github/workflows/build-docker-images.yml
-    with:
-      image_postfix: "-push-ci"
-    secrets: inherit
-
-  run_push_ci:
-    name: Trigger Push CI
-    runs-on: ubuntu-22.04
-    if: ${{ always() }}
-    needs: build-docker-containers
-    steps:
-      - name: Trigger push CI via workflow_run
-        run: echo "Trigger push CI via workflow_run"
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -1,652 +1,275 @@
 name: Self-hosted runner (push)

 on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
  push:
    branches:
+      - master
      - ci_*
-      - ci-*
    paths:
      - "src/**"
      - "tests/**"
      - ".github/**"
      - "templates/**"
-      - "utils/**"
+  # pull_request:
  repository_dispatch:

-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  PYTEST_TIMEOUT: 60
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1

 jobs:
-  setup:
-    name: Setup
-    strategy:
-      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      test_map: ${{ steps.set-matrix.outputs.test_map }}
-    env:
-      # `CI_BRANCH_PUSH`: The branch name from the push event
-      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
-      # `CI_SHA_PUSH`: The commit SHA from the push event
-      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
+  run_tests_torch_gpu:
+    runs-on: [self-hosted, gpu, single-gpu]
    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
-        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
+      - uses: actions/checkout@v2
+      - name: Python version
        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+          which python
+          python --version
+          pip --version

-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi

-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Fetch the tests to run
-        working-directory: /transformers
-        # TODO: add `git-python` in the docker images
-        run: |
-          pip install --upgrade git-python
-          python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
-
-      - name: Report fetched tests
-        uses: actions/upload-artifact@v4
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
        with:
-          name: test_fetched
-          path: /transformers/test_preparation.txt
+          path: .env
+          key: v1.1-tests_torch_gpu-${{ hashFiles('setup.py') }}

-      - id: set-matrix
-        name: Organize tests into models
-        working-directory: /transformers
-        # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
-        # The `test_map` is used to get the actual identified test files under each key.
-        # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
        run: |
-          if [ -f test_map.json ]; then
-              keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
-              test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
-          else
-              keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
-              test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
-          fi
-          echo $keys
-          echo $test_map
-          echo "matrix=$keys" >> $GITHUB_OUTPUT
-          echo "test_map=$test_map" >> $GITHUB_OUTPUT
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version

-  run_tests_single_gpu:
-    name: Model tests
-    needs: setup
-    # `dummy` means there is no test to run
-    if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g4dn-2xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
+      - name: Install dependencies
        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip install pandas torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html

-      - name: print environment variables
+      - name: Are GPUs recognized by our DL frameworks
        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+          source .env/bin/activate
+          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"

-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
+#      - name: Create model files
+#        run: |
+#          source .env/bin/activate
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model

-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
-
-  run_tests_multi_gpu:
-    name: Model tests
-    needs: setup
-    # `dummy` means there is no test to run
-    if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
+      - name: Run all non-slow tests on GPU
        env:
-          MKL_SERVICE_FORCE_INTEL: 1
-        working-directory: /transformers
+          OMP_NUM_THREADS: 1
+          CUDA_VISIBLE_DEVICES: 0
        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
+          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_gpu tests

      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
-
-  run_tests_torch_cuda_extensions_single_gpu:
-    name: Torch CUDA extension tests
-    needs: setup
-    if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /workspace/transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Update clone using environment variables
-        working-directory: /workspace/transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /workspace/transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Remove cached torch extensions
-        run: rm -rf /github/home/.cache/torch_extensions/
-
-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again*
-        working-directory: /workspace
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /workspace/transformers
-        run: |
-          python utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /workspace/transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
-        working-directory: /workspace/transformers
-        # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
-        run: |
-          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+        run: cat reports/tests_torch_gpu_failures_short.txt
+        
+      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v2
        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          name: run_all_tests_torch_gpu_test_reports
+          path: reports
+                  

-  run_tests_torch_cuda_extensions_multi_gpu:
-    name: Torch CUDA extension tests
-    needs: setup
-    if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
+  run_tests_tf_gpu:
+    runs-on: [self-hosted, gpu, single-gpu]
    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
+      - uses: actions/checkout@v2
+      - name: Python version
        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+          which python
+          python --version
+          pip --version
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi

-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /workspace/transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Update clone using environment variables
-        working-directory: /workspace/transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /workspace/transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Remove cached torch extensions
-        run: rm -rf /github/home/.cache/torch_extensions/
-
-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again*
-        working-directory: /workspace
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /workspace/transformers
-        run: |
-          python utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /workspace/transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
-        working-directory: /workspace/transformers
-        # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
-        run: |
-          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: .env
+          key: v1.1-tests_tf_gpu-${{ hashFiles('setup.py') }}

-  send_results:
-    name: Send results to webhook
-    runs-on: ubuntu-22.04
-    if: always()
-    needs: [
-        setup,
-        run_tests_single_gpu,
-        run_tests_multi_gpu,
-        run_tests_torch_cuda_extensions_single_gpu,
-        run_tests_torch_cuda_extensions_multi_gpu
-    ]
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      - name: Preliminary job status
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
        run: |
-          echo "Setup status: ${{ needs.setup.result }}"
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version

-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
+      - name: Install dependencies
        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets

-      - name: print environment variables
+      - name: Are GPUs recognized by our DL frameworks
        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+          source .env/bin/activate
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"

-      - uses: actions/checkout@v4
-        # To avoid failure when multiple commits are merged into `main` in a short period of time.
-        # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
-        # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
-        with:
-          fetch-depth: 20
-
-      - name: Update clone using environment variables
+      - name: Create model files
        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
+          source .env/bin/activate
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
+#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model

-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack
+      - name: Run all non-slow tests on GPU
        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
-          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
-          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: push
-          CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
-          CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
-          CI_SHA: ${{ env.CI_SHA }}
-          SETUP_STATUS: ${{ needs.setup.result }}
-
-        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
-        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+          OMP_NUM_THREADS: 1
+          CUDA_VISIBLE_DEVICES: 0
        run: |
-          pip install huggingface_hub
-          pip install slack_sdk
-          pip show slack_sdk
-          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
+          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_gpu_failures_short.txt
+        
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_tf_gpu_test_reports
+          path: reports
+
+  run_tests_torch_multi_gpu:
+    runs-on: [self-hosted, gpu, multi-gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Python version
+        run: |
+          which python
+          python --version
+          pip --version
+
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .env
+          key: v1.1-tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version
+      - name: Install dependencies
+        run: |
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip install pandas torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          source .env/bin/activate
+          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
+
+      - name: Run all non-slow tests on GPU
+        env:
+          OMP_NUM_THREADS: 1
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_torch_multi_gpu_failures_short.txt          
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_torch_multi_gpu_test_reports
+          path: reports
+
+  run_tests_tf_multi_gpu:
+    runs-on: [self-hosted, gpu, multi-gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Python version
+        run: |
+          which python
+          python --version
+          pip --version
+
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .env
+          key: v1.1-tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version
+      - name: Install dependencies
+        run: |
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          source .env/bin/activate
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
+
+      - name: Run all non-slow tests on GPU
+        env:
+          OMP_NUM_THREADS: 1
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_multi_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_tf_multi_gpu_test_reports
+          path: reports
+          
--- a/.github/workflows/self-scheduled-amd-caller.yml
+++ b/.github/workflows/self-scheduled-amd-caller.yml
@ -1,14 +0,0 @@
-name: Self-hosted runner (AMD scheduled CI caller)
-
-on:
-  schedule:
-    - cron: "17 2 * * *"
-
-jobs:
-  run_scheduled_amd_ci:
-    name: Trigger Scheduled AMD CI
-    runs-on: ubuntu-22.04
-    if: ${{ always() }}
-    steps:
-      - name: Trigger scheduled AMD CI via workflow_run
-        run: echo "Trigger scheduled AMD CI via workflow_run"
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@ -1,59 +0,0 @@
-name: Self-hosted runner (AMD mi250 scheduled CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi300-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi300-caller.yml
@ -1,63 +0,0 @@
-name: Self-hosted runner scale set (AMD mi300 scheduled CI caller)
-
-# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
-# For example, 1gpu scale set: amd-mi300-ci-1gpu
-#              2gpu scale set: amd-mi300-ci-2gpu
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi300-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi300
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi300-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi300
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi300-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi300
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi300-ci
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi300
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -1,119 +0,0 @@
-name: Self-hosted runner (scheduled)
-
-
-on:
-  repository_dispatch:
-  schedule:
-    - cron: "17 2 * * *"
-  push:
-    branches:
-      - run_scheduled_ci*
-  workflow_dispatch:
-    inputs:
-      prev_workflow_run_id:
-        description: 'previous workflow run id to compare'
-        type: string
-        required: false
-        default: ""
-      other_workflow_run_id:
-        description: 'other workflow run id to compare'
-        type: string
-        required: false
-        default: ""
-
-
-# Used for `push` to easily modiffy the target workflow runs to compare against
-env:
-    prev_workflow_run_id: ""
-    other_workflow_run_id: ""
-
-
-jobs:
-  setup:
-    name: Setup
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Setup
-        run: |
-          mkdir "setup_values"
-          echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
-          echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: setup_values
-          path: setup_values
-
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-models"
-      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
-      runner: daily-ci
-      docker: huggingface/transformers-pytorch-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-examples"
-      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
-
-  trainer-fsdp-ci:
-    name: Trainer/FSDP CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_trainer_and_fsdp_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      runner: daily-ci
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Daily CI
-      working-directory-prefix: /workspace
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
-
-  quantization-ci:
-    name: Quantization CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_quantization_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-quantization"
-      runner: daily-ci
-      docker: huggingface/transformers-quantization-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -1,538 +1,356 @@
+# configuration notes:
+#
+# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise
+# the step uses the system-wide python interpreter.
+
 name: Self-hosted runner (scheduled)

-# Note that each job's dependencies go into a corresponding docker file.
-#
-# For example for `run_torch_cuda_extensions_gpu` the docker image is
-# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
-# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
-
 on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      working-directory-prefix:
-        default: ''
-        required: false
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-  NUM_SLICES: 2
+  repository_dispatch:
+  schedule:
+    - cron: "0 0 * * *"

 jobs:
-  setup:
-    if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
-    name: Setup
-    strategy:
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
-      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-      quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
+  run_all_tests_torch_gpu:
+    runs-on: [self-hosted, gpu, single-gpu]
    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
+      - uses: actions/checkout@v2

-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - id: set-matrix
-        if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
-        name: Identify models to test
-        working-directory: /transformers/tests
-        run: |
-          if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
-            echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-            echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-          elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
-            echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
-            echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
-          fi
-
-      - id: set-matrix-quantization
-        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-        name: Identify quantization method to test
-        working-directory: /transformers/tests
-        run: |
-          echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ;  print(d)')" >> $GITHUB_OUTPUT
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-  run_models_gpu:
-    if: ${{ inputs.job == 'run_models_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-      runner: ${{ inputs.runner }}
-      docker: ${{ inputs.docker }}
-    secrets: inherit
-
-  run_trainer_and_fsdp_gpu:
-    if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-        slice_id: [0, 1]
-    uses: ./.github/workflows/model_jobs.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-      runner: ${{ inputs.runner }}
-      docker: ${{ inputs.docker }}
-      report_name_prefix: run_trainer_and_fsdp_gpu
-    secrets: inherit
-
-  run_pipelines_torch_gpu:
-    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
-    name: PyTorch pipelines
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-pytorch-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run all pipeline tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
        with:
-          name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
+          path: .env
+          key: v  1.1-slow_tests_torch_gpu-${{ hashFiles('setup.py') }}

-  run_examples_gpu:
-    if: ${{ inputs.job == 'run_examples_gpu' }}
-    name: Examples directory
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
+      - name: Python version
        run: |
-          nvidia-smi
+          which python
+          python --version
+          pip --version

-      - name: Environment
-        working-directory: /transformers
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        if: steps.cache.outputs.cache-hit != 'true'
        run: |
-          python3 utils/print_env.py
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version

-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
+      - name: Install dependencies
        run: |
-          echo "${{ matrix.machine_type }}"
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip list

-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run examples tests on GPU
-        working-directory: /transformers
+      - name: Are GPUs recognized by our DL frameworks
        run: |
-          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_examples_gpu_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports
-
-  run_torch_cuda_extensions_gpu:
-    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
-    name: Torch CUDA extension tests
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: ${{ inputs.docker }}
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: |
-          python3 -m pip install -U datasets
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
-      - name: Remove cached torch extensions
-        run: rm -rf /github/home/.cache/torch_extensions/
-
-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again* (for daily CI)
-        if: ${{ contains(inputs.ci_event, 'Daily CI') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again* (for nightly & Past CI)
-        if: ${{ contains(inputs.ci_event, 'Nightly CI') || contains(inputs.ci_event, 'Past CI') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          rm -rf DeepSpeed
-          git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
-          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
+          source .env/bin/activate
+          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"

      - name: Run all tests on GPU
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_gpu tests

      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-
-  run_quantization_torch_gpu:
-    if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      max-parallel: 4
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
-        machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-quantization-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
+        run: cat reports/tests_torch_gpu_failures_short.txt
+        
+      - name: Run examples tests on GPU
+        if: ${{ always() }}
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run quantization tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+          source .env/bin/activate
+          pip install -r examples/_tests_requirements.txt
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_gpu examples

      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
+        run: cat reports/examples_torch_gpu_failures_short.txt

-  run_extract_warnings:
-    # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
-    if: ${{ always() && inputs.job == 'run_models_gpu' }}
-    name: Extract warnings in CI artifacts
-    runs-on: ubuntu-22.04
-    needs: [setup, run_models_gpu]
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - name: Install transformers
-        run: pip install transformers
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Create output directory
-        run: mkdir warnings_in_ci
-
-      - uses: actions/download-artifact@v4
-        with:
-          path: warnings_in_ci
-
-      - name: Show artifacts
-        run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
-        working-directory: warnings_in_ci
-
-      - name: Extract warnings in CI artifacts
+      - name: Run all pipeline tests on GPU
+        if: ${{ always() }}
+        env:
+          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+          RUN_PIPELINE_TESTS: yes
        run: |
-          python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
-          echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests

-      - name: Upload artifact
+      - name: Failure short reports
        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
+        run: cat reports/tests_torch_pipeline_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
        with:
-          name: warnings_in_ci
-          path: warnings_in_ci/selected_warnings.json
+          name: run_all_tests_torch_gpu_test_reports
+          path: reports

-  send_results:
-    name: Slack Report
-    needs: [
-      setup,
-      run_models_gpu,
-      run_trainer_and_fsdp_gpu,
-      run_pipelines_torch_gpu,
-      run_examples_gpu,
-      run_torch_cuda_extensions_gpu,
-      run_quantization_torch_gpu,
-      run_extract_warnings
-    ]
-    if: ${{ always() }}
-    uses: ./.github/workflows/slack-report.yml
-    with:
-      job: ${{ inputs.job }}
-      # This would be `skipped` if `setup` is skipped.
-      setup_status: ${{ needs.setup.result }}
-      slack_report_channel: ${{ inputs.slack_report_channel }}
-      # This would be an empty string if `setup` is skipped.
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      ci_event: ${{ inputs.ci_event }}
-      report_repo_id: ${{ inputs.report_repo_id }}

-    secrets: inherit
+  run_all_tests_tf_gpu:
+    runs-on: [self-hosted, gpu, single-gpu]
+    steps:
+      - uses: actions/checkout@v2

-  check_new_failures:
-    if: ${{ always() && inputs.ci_event == 'Daily CI' && needs.send_results.result == 'success' }}
-    name: Check new failures
-    needs: send_results
-    uses: ./.github/workflows/check_failed_tests.yml
-    with:
-      docker: ${{ inputs.docker }}
-      start_sha: ${{ github.sha }}
-      job: ${{ inputs.job }}
-      slack_report_channel: ${{ inputs.slack_report_channel }}
-      ci_event: ${{ inputs.ci_event }}
-      report_repo_id: ${{ inputs.report_repo_id }}
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .env
+          key: v1.1-slow_tests_tf_gpu-${{ hashFiles('setup.py') }}

-    secrets: inherit
+      - name: Python version
+        run: |
+          which python
+          python --version
+          pip --version
+
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version
+
+      - name: Install dependencies
+        run: |
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip list
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          source .env/bin/activate
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
+
+      - name: Run all tests on GPU
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_gpu tests
+          
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_gpu_failures_short.txt
+
+      - name: Run all pipeline tests on GPU
+        if: ${{ always() }}
+        env:
+          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+          RUN_PIPELINE_TESTS: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_pipelines_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_tf_gpu_test_reports
+          path: reports
+          
+  run_all_tests_torch_multi_gpu:
+    runs-on: [self-hosted, gpu, multi-gpu]
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .env
+          key: v1.1-slow_tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
+
+      - name: Python version
+        run: |
+          which python
+          python --version
+          pip --version
+
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version
+
+      - name: Install dependencies
+        run: |
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip list
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          source .env/bin/activate
+          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
+
+      - name: Run all tests on multi-GPU
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_torch_multi_gpu_failures_short.txt
+
+      - name: Run examples tests on multi-GPU
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_examples_multi_gpu examples
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_torch_examples_multi_gpu_failures_short.txt
+
+      - name: Run all pipeline tests on multi-GPU
+        if: ${{ always() }}
+        env:
+          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+          RUN_PIPELINE_TESTS: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_torch_multi_gpu_test_reports
+          path: reports
+
+  run_all_tests_tf_multi_gpu:
+    runs-on: [self-hosted, gpu, multi-gpu]
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Loading cache.
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .env
+          key: v1.1-slow_tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
+
+      - name: Python version
+        run: |
+          which python
+          python --version
+          pip --version
+
+      - name: Current dir
+        run: pwd
+      - run: nvidia-smi
+
+      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          which python
+          python --version
+          pip --version
+
+      - name: Install dependencies
+        run: |
+          source .env/bin/activate
+          pip install --upgrade pip
+          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install git+https://github.com/huggingface/datasets
+          pip list
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          source .env/bin/activate
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
+          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
+
+      - name: Run all tests on multi-GPU
+        env:
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_multi_gpu_failures_short.txt
+
+      - name: Run all pipeline tests on multi-GPU
+        if: ${{ always() }}
+        env:
+          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 1
+          RUN_SLOW: yes
+          RUN_PIPELINE_TESTS: yes
+        run: |
+          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
+          
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_tf_pipeline_multi_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_tf_multi_gpu_test_reports
+          path: reports
+          
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -1,93 +0,0 @@
-name: CI slack report
-
-on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      setup_status:
-        required: true
-        type: string
-      folder_slices:
-        required: true
-        type: string
-      quantization_matrix:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-
-env:
-  TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-
-jobs:
-  send_results:
-    name: Send results to webhook
-    runs-on: ubuntu-22.04
-    if: always()
-    steps:
-      - name: Preliminary job status
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          echo "Setup status: ${{ inputs.setup_status }}"
-
-      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-
-      - name: Prepare some setup values
-        run: |
-          if [ -f setup_values/prev_workflow_run_id.txt ]; then
-            echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-          if [ -f setup_values/other_workflow_run_id.txt ]; then
-            echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-      - name: Send message to Slack
-        shell: bash
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
-          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
-          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: ${{ inputs.ci_event }}
-          CI_SHA: ${{ github.sha }}
-          CI_TEST_JOB: ${{ inputs.job }}
-          SETUP_STATUS: ${{ inputs.setup_status }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
-        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
-        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
-        # For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
-        # empty string, and the called script still get one argument (which is the emtpy string).
-        run: |
-          pip install huggingface_hub
-          pip install slack_sdk
-          pip show slack_sdk
-          if [ "${{ inputs.quantization_matrix }}" != "" ]; then
-            python utils/notification_service.py "${{ inputs.quantization_matrix }}"
-          else
-            python utils/notification_service.py "${{ inputs.folder_slices }}"
-          fi          
-
-      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
-      - name: Failure table artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@ -1,113 +0,0 @@
-name: SSH into our runners
-
-on:
-  workflow_dispatch:
-    inputs:
-      runner_type:
-        description: 'Type of runner to test (a10 or t4)'
-        required: true
-      docker_image:
-        description: 'Name of the Docker image'
-        required: true
-      num_gpus:
-        description: 'Type of the number of gpus to use (`single` or `multi`)'
-        required: true
-
-env:
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  get_runner:
-    name: "Get runner to use"
-    runs-on: ubuntu-22.04
-    outputs:
-      RUNNER: ${{ steps.set_runner.outputs.RUNNER }}
-    steps:
-      - name: Get runner to use
-        shell: bash
-        run: |
-          if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
-            echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
-            echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
-            echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
-            echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
-          else
-            echo "RUNNER=" >> $GITHUB_ENV
-          fi
-
-      - name: Set runner to use
-        id: set_runner
-        run: |
-          echo ${{ env.RUNNER }}
-          echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT
-
-  ssh_runner:
-    name: "SSH"
-    needs: get_runner
-    runs-on:
-      group: ${{ needs.get_runner.outputs.RUNNER }}
-    container:
-      image: ${{ github.event.inputs.docker_image }}
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
-
-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Store Slack infos
-        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
-        shell: bash
-        run: |
-          echo "${{ github.actor }}"
-          github_actor=${{ github.actor }}
-          github_actor=${github_actor/'-'/'_'}
-          echo "$github_actor"
-          echo "github_actor=$github_actor" >> $GITHUB_ENV
-
-      - name: Store Slack infos
-        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
-        shell: bash
-        run: |
-          echo "${{ env.github_actor }}"
-          if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then
-            echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV
-          else
-            echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
-          fi
-
-      - name: Tailscale # In order to be able to SSH when a test fails
-        uses: huggingface/tailscale-action@main
-        with:
-          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
-          slackChannel: ${{ env.SLACKCHANNEL }}
-          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-          waitForSSH: true
-          sshTimeout: 15m
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -1,29 +0,0 @@
-name: Stale Bot
-
-on:
-  schedule:
-    - cron: "0 8 * * *"
-
-jobs:
-  close_stale_issues:
-    name: Close Stale Issues
-    if: github.repository == 'huggingface/transformers'
-    runs-on: ubuntu-22.04
-    permissions:
-      issues: write
-    env:
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: 3.8
-
-    - name: Install requirements
-      run: |
-        pip install PyGithub
-    - name: Close stale issues
-      run: |
-        python scripts/stale.py
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@ -1,20 +0,0 @@
-on:
-  push:
-
-name: Secret Leaks
-
-permissions:
-  contents: read
-
-jobs:
-  trufflehog:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Secret Scanning
-        uses: trufflesecurity/trufflehog@main
-        with:
-          extra_args: --results=verified,unknown
--- a/.github/workflows/update_metdata.yml
+++ b/.github/workflows/update_metdata.yml
@ -1,27 +0,0 @@
-name: Update Transformers metadata
-
-on:
-  push:
-    branches:
-      - main
-      - update_transformers_metadata*
-
-jobs:
-  build_and_package:
-    runs-on: ubuntu-22.04
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Setup environment
-        run: |
-          pip install --upgrade pip
-          pip install datasets pandas
-          pip install .[torch,tf,flax]
-
-      - name: Update metadata
-        run: |
-          python utils/update_metadata.py --token ${{ secrets.LYSANDRE_HF_TOKEN }} --commit_sha ${{ github.sha }}
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@ -1,16 +0,0 @@
-name: Upload PR Documentation
-
-on:
-  workflow_run:
-    workflows: ["Build PR Documentation"]
-    types:
-      - completed
-
-jobs:
-  build:
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
-    with:
-      package_name: transformers
-    secrets:
-      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
-      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -9,7 +9,8 @@ __pycache__/
 *.so

 # tests and logs
-tests/fixtures/cached_*_text.txt
+tests/fixtures/*
+!tests/fixtures/sample_text_no_unicode.txt
 logs/
 lightning_logs/
 lang_code_data/
@ -160,10 +161,4 @@ tags
 .pre-commit*

 # .lock
-*.lock
-
-# DS_Store (MacOS)
-.DS_Store
-
-# ruff
-.ruff_cache
+*.lock
--- a/CITATION.cff
+++ b/CITATION.cff
@ -1,82 +0,0 @@
-cff-version: "1.2.0"
-date-released: 2020-10
-message: "If you use this software, please cite it using these metadata."
-title: "Transformers: State-of-the-Art Natural Language Processing"
-url: "https://github.com/huggingface/transformers"
-authors: 
-  - family-names: Wolf
-    given-names: Thomas
-  - family-names: Debut
-    given-names: Lysandre
-  - family-names: Sanh
-    given-names: Victor
-  - family-names: Chaumond
-    given-names: Julien
-  - family-names: Delangue
-    given-names: Clement
-  - family-names: Moi
-    given-names: Anthony
-  - family-names: Cistac
-    given-names: Perric
-  - family-names: Ma
-    given-names: Clara
-  - family-names: Jernite
-    given-names: Yacine
-  - family-names: Plu
-    given-names: Julien
-  - family-names: Xu
-    given-names: Canwen
-  - family-names: "Le Scao"
-    given-names: Teven
-  - family-names: Gugger
-    given-names: Sylvain
-  - family-names: Drame
-    given-names: Mariama
-  - family-names: Lhoest
-    given-names: Quentin
-  - family-names: Rush
-    given-names: "Alexander M."
-preferred-citation:
-  type: conference-paper
-  authors:
-  - family-names: Wolf
-    given-names: Thomas
-  - family-names: Debut
-    given-names: Lysandre
-  - family-names: Sanh
-    given-names: Victor
-  - family-names: Chaumond
-    given-names: Julien
-  - family-names: Delangue
-    given-names: Clement
-  - family-names: Moi
-    given-names: Anthony
-  - family-names: Cistac
-    given-names: Perric
-  - family-names: Ma
-    given-names: Clara
-  - family-names: Jernite
-    given-names: Yacine
-  - family-names: Plu
-    given-names: Julien
-  - family-names: Xu
-    given-names: Canwen
-  - family-names: "Le Scao"
-    given-names: Teven
-  - family-names: Gugger
-    given-names: Sylvain
-  - family-names: Drame
-    given-names: Mariama
-  - family-names: Lhoest
-    given-names: Quentin
-  - family-names: Rush
-    given-names: "Alexander M."
-  booktitle: "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations"
-  month: 10
-  start: 38
-  end: 45
-  title: "Transformers: State-of-the-Art Natural Language Processing"
-  year: 2020
-  publisher: "Association for Computational Linguistics"
-  url: "https://www.aclweb.org/anthology/2020.emnlp-demos.6"
-  address: "Online"
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -7,8 +7,8 @@ We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
 identity and expression, level of experience, education, socio-economic status,
-nationality, personal appearance, race, caste, color, religion, or sexual
-identity and orientation.
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.

 We pledge to act and interact in ways that contribute to an open, welcoming,
 diverse, inclusive, and healthy community.
@ -23,17 +23,17 @@ community include:
 * Giving and gracefully accepting constructive feedback
 * Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
-* Focusing on what is best not just for us as individuals, but for the overall
-  community
+* Focusing on what is best not just for us as individuals, but for the
+  overall community

 Examples of unacceptable behavior include:

-* The use of sexualized language or imagery, and sexual attention or advances of
-  any kind
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
 * Trolling, insulting or derogatory comments, and personal or political attacks
 * Public or private harassment
-* Publishing others' private information, such as a physical or email address,
-  without their explicit permission
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
  professional setting

@ -83,15 +83,15 @@ behavior was inappropriate. A public apology may be requested.

 ### 2. Warning

-**Community Impact**: A violation through a single incident or series of
-actions.
+**Community Impact**: A violation through a single incident or series
+of actions.

 **Consequence**: A warning with consequences for continued behavior. No
 interaction with the people involved, including unsolicited interaction with
 those enforcing the Code of Conduct, for a specified period of time. This
 includes avoiding interactions in community spaces as well as external channels
-like social media. Violating these terms may lead to a temporary or permanent
-ban.
+like social media. Violating these terms may lead to a temporary or
+permanent ban.

 ### 3. Temporary Ban

@ -107,27 +107,23 @@ Violating these terms may lead to a permanent ban.
 ### 4. Permanent Ban

 **Community Impact**: Demonstrating a pattern of violation of community
-standards, including sustained inappropriate behavior, harassment of an
+standards, including sustained inappropriate behavior,  harassment of an
 individual, or aggression toward or disparagement of classes of individuals.

-**Consequence**: A permanent ban from any sort of public interaction within the
-community.
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.

 ## Attribution

 This Code of Conduct is adapted from the [Contributor Covenant][homepage],
-version 2.1, available at
-[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

-Community Impact Guidelines were inspired by
-[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
-
-For answers to common questions about this code of conduct, see the FAQ at
-[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
-[https://www.contributor-covenant.org/translations][translations].
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).

 [homepage]: https://www.contributor-covenant.org
-[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
-[Mozilla CoC]: https://github.com/mozilla/diversity
-[FAQ]: https://www.contributor-covenant.org/faq
-[translations]: https://www.contributor-covenant.org/translations
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -14,337 +14,296 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->

-# Contribute to 🤗 Transformers
+# How to contribute to transformers?

 Everyone is welcome to contribute, and we value everybody's contribution. Code
-contributions are not the only way to help the community. Answering questions, helping
-others, and improving the documentation are also immensely valuable.
+is thus not the only way to help the community. Answering questions, helping
+others, reaching out and improving the documentations are immensely valuable to
+the community.

-It also helps us if you spread the word! Reference the library in blog posts
-about the awesome projects it made possible, shout out on Twitter every time it has
-helped you, or simply ⭐️ the repository to say thank you.
+It also helps us if you spread the word: reference the library from blog posts
+on the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply star the repo to say "thank you".

-However you choose to contribute, please be mindful and respect our
-[code of conduct](https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md).
+Whichever way you choose to contribute, please be mindful to respect our
+[code of conduct](https://github.com/huggingface/transformers/blob/master/CODE_OF_CONDUCT.md).

-**This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).**
+## You can contribute in so many ways!

-## Ways to contribute
+There are 4 ways you can contribute to transformers:
+* Fixing outstanding issues with the existing code;
+* Implementing new models;
+* Contributing to the examples or to the documentation;
+* Submitting issues related to bugs or desired new features.

-There are several ways you can contribute to 🤗 Transformers:
+*All are equally valuable to the community.*

-* Fix outstanding issues with the existing code.
-* Submit issues related to bugs or desired new features.
-* Implement new models.
-* Contribute to the examples or to the documentation.
+## Submitting a new issue or feature request

-If you don't know where to start, there is a special [Good First
-Issue](https://github.com/huggingface/transformers/contribute) listing. It will give you a list of
-open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
-
-For something slightly more challenging, you can also take a look at the [Good Second Issue](https://github.com/huggingface/transformers/labels/Good%20Second%20Issue) list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
-
-> All contributions are equally valuable to the community. 🥰
-
-## Fixing outstanding issues
-
-If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](#create-a-pull-request) and open a Pull Request!
-
-## Submitting a bug-related issue or feature request
-
-Do your best to follow these guidelines when submitting a bug-related issue or a feature
+Do your best to follow these guidelines when submitting an issue or a feature
 request. It will make it easier for us to come back to you quickly and with good
 feedback.

 ### Did you find a bug?

-The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+The transformers are robust and reliable thanks to the users who notify us of
+the problems they encounter. So thank you for reporting an issue.

-Before you report an issue, we would really appreciate it if you could **make sure the bug was not
-already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions.
+First, we would really appreciate it if you could **make sure the bug was not
+already reported** (use the search bar on Github under Issues).

-> [!TIP]
-> We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫
+Did not find it? :( So we can act quickly on it, please follow these steps:

-Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
-
-* Your **OS type and version** and **Python**, **PyTorch** and
-  **TensorFlow** versions when applicable.
+* Include your **OS type and version**, the versions of **Python**, **PyTorch** and
+  **Tensorflow** when applicable;
 * A short, self-contained, code snippet that allows us to reproduce the bug in
-  less than 30s.
-* The *full* traceback if an exception is raised.
-* Attach any other additional information, like screenshots, you think may help.
+  less than 30s;
+* Provide the *full* traceback if an exception is raised.

-To get the OS and software versions automatically, run the following command:
+To get the OS and software versions automatically, you can run the following command:

 ```bash
-transformers env
+transformers-cli env
 ```

-You can also run the same command from the root of the repository:
+or from the root of the repository the following command:

 ```bash
 python src/transformers/commands/transformers_cli.py env
 ```

-### Do you want a new feature?

-If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+### Do you want to implement a new model?

-1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+Awesome! Please provide the following information:

-   Whatever it is, we'd love to hear about it!
-
-2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
-3. Provide a *code snippet* that demonstrates the features usage.
-4. If the feature is related to a paper, please include a link.
-
-If your issue is well written we're already 80% of the way there by the time you create it.
-
-We have added [templates](https://github.com/huggingface/transformers/tree/main/templates) to help you get started with your issue.
-
-## Do you want to implement a new model?
-
-New models are constantly released and if you want to implement a new model, please provide the following information:
-
-* A short description of the model and a link to the paper.
-* Link to the implementation if it is open-sourced.
+* Short description of the model and link to the paper;
+* Link to the implementation if it is open-source;
 * Link to the model weights if they are available.

-If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+If you are willing to contribute the model yourself, let us know so we can best
+guide you.

-We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
+We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them
+in the [`templates`](https://github.com/huggingface/transformers/tree/master/templates) folder.

-## Do you want to add documentation?
+### Do you want a new feature (that is not a model)?

-We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+A world-class feature request addresses the following points:

-For more details about how to generate, build, and write the documentation, take a look at the documentation [README](https://github.com/huggingface/transformers/tree/main/docs).
+1. Motivation first:
+  * Is it related to a problem/frustration with the library? If so, please explain
+    why. Providing a code snippet that demonstrates the problem is best.
+  * Is it related to something you would need for a project? We'd love to hear
+    about it!
+  * Is it something you worked on and think could benefit the community?
+    Awesome! Tell us what problem it solved for you.
+2. Write a *full paragraph* describing the feature;
+3. Provide a **code snippet** that demonstrates its future use;
+4. In case this is related to a paper, please attach a link;
+5. Attach any additional information (drawings, screenshots, etc.) you think may help.

-## Create a Pull Request
+If your issue is well written we're already 80% of the way there by the time you
+post it.

-Before writing any code, we strongly advise you to search through the existing PRs or
-issues to make sure nobody is already working on the same thing. If you are
+We have added **templates** to guide you in the process of adding a new example script for training or testing the
+models in the library. You can find them in the [`templates`](https://github.com/huggingface/transformers/tree/master/templates)
+folder.
+
+## Start contributing! (Pull Requests)
+
+Before writing code, we strongly advise you to search through the existing PRs or
+issues to make sure that nobody is already working on the same thing. If you are
 unsure, it is always a good idea to open an issue to get some feedback.

-You will need basic `git` proficiency to contribute to
-🤗 Transformers. While `git` is not the easiest tool to use, it has the greatest
-manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
+You will need basic `git` proficiency to be able to contribute to
+`transformers`. `git` is not the easiest tool to use but it has the greatest
+manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
 Git](https://git-scm.com/book/en/v2) is a very good reference.

-You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+Follow these steps to start contributing:

 1. Fork the [repository](https://github.com/huggingface/transformers) by
-   clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
+   clicking on the 'Fork' button on the repository's page. This creates a copy of the code
   under your GitHub user account.

 2. Clone your fork to your local disk, and add the base repository as a remote:

   ```bash
-   git clone git@github.com:<your Github handle>/transformers.git
-   cd transformers
-   git remote add upstream https://github.com/huggingface/transformers.git
+   $ git clone git@github.com:<your Github handle>/transformers.git
+   $ cd transformers
+   $ git remote add upstream https://github.com/huggingface/transformers.git
   ```

 3. Create a new branch to hold your development changes:

   ```bash
-   git checkout -b a-descriptive-name-for-my-changes
+   $ git checkout -b a-descriptive-name-for-my-changes
   ```

-   🚨 **Do not** work on the `main` branch!
+   **Do not** work on the `master` branch.

 4. Set up a development environment by running the following command in a virtual environment:

   ```bash
-   pip install -e ".[dev]"
+   $ pip install -e ".[dev]"
   ```

-   If 🤗 Transformers was already installed in the virtual environment, remove
+   (If transformers was already installed in the virtual environment, remove
   it with `pip uninstall transformers` before reinstalling it in editable
-   mode with the `-e` flag.
+   mode with the `-e` flag.)

-   Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
-   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
-   (PyTorch, TensorFlow and/or Flax) then do:
+   To run the full test suite, you might need the additional dependency on `datasets` which requires a separate source
+   install:

   ```bash
-   pip install -e ".[quality]"
+   $ git clone https://github.com/huggingface/datasets
+   $ cd datasets
+   $ pip install -e .
   ```

-   which should be enough for most use cases.
+   If you have already cloned that repo, you might need to `git pull` to get the most recent changes in the `datasets`
+   library.

-5. Develop the features in your branch.
+5. Develop the features on your branch.

-   As you work on your code, you should make sure the test suite
-   passes. Run the tests impacted by your changes like this:
+   As you work on the features, you should make sure that the test suite
+   passes:

   ```bash
-   pytest tests/<TEST_TO_RUN>.py
+   $ make test
   ```

-   For more information about tests, check out the
-   [Testing](https://huggingface.co/docs/transformers/testing) guide.
-
-   🤗 Transformers relies on `black` and `ruff` to format its source code
-   consistently. After you make changes, apply automatic style corrections and code verifications
-   that can't be automated in one go with:
+   Note, that this command uses `-n auto` pytest flag, therefore, it will start as many parallel `pytest` processes as the number of your computer's CPU-cores, and if you have lots of those and a few GPUs and not a great amount of RAM, it's likely to overload your computer. Therefore, to run the test suite, you may want to consider using this command instead:

   ```bash
-   make fixup
+   $ python -m pytest -n 3 --dist=loadfile -s -v ./tests/
+   ```
+
+   Adjust the value of `-n` to fit the load your hardware can support.
+
+   `transformers` relies on `black` and `isort` to format its source code
+   consistently. After you make changes, format them with:
+
+   ```bash
+   $ make style
+   ```
+
+   `transformers` also uses `flake8` and a few custom scripts to check for coding mistakes. Quality
+   control runs in CI, however you can also run the same checks with:
+
+   ```bash
+   $ make quality
+   ```
+   You can do the automatic style corrections and code verifications that can't be automated in one go:
+
+   ```bash
+   $ make fixup
   ```

   This target is also optimized to only work with files modified by the PR you're working on.

-   If you prefer to run the checks one after the other, the following command applies the
-   style corrections:
+   If you're modifying documents under `docs/source`, make sure to validate that
+   they can still be built. This check also runs in CI. To run a local check
+   make sure you have installed the documentation builder requirements, by
+   running `pip install .[tf,torch,docs]` once from the root of this repository
+   and then run:

   ```bash
-   make style
+   $ make docs
   ```

-   🤗 Transformers also uses `ruff` and a few custom scripts to check for coding mistakes. Quality
-   controls are run by the CI, but you can run the same checks with:
+   Once you're happy with your changes, add changed files using `git add` and
+   make a commit with `git commit` to record your changes locally:

   ```bash
-   make quality
+   $ git add modified_file.py
+   $ git commit
   ```

-   Finally, we have a lot of scripts to make sure we don't forget to update
-   some files when adding a new model. You can run these scripts with:
+   Please write [good commit
+   messages](https://chris.beams.io/posts/git-commit/).
+
+   It is a good idea to sync your copy of the code with the original
+   repository regularly. This way you can quickly account for changes:

   ```bash
-   make repo-consistency
+   $ git fetch upstream
+   $ git rebase upstream/master
   ```

-   To learn more about those checks and how to fix any issues with them, check out the
-   [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.
-
-   If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
-   make sure you install the [documentation builder](https://github.com/huggingface/doc-builder).
+   Push the changes to your account using:

   ```bash
-   pip install hf-doc-builder
+   $ git push -u origin a-descriptive-name-for-my-changes
   ```

-   Run the following command from the root of the repository:
+6. Once you are satisfied (**and the checklist below is happy too**), go to the
+   webpage of your fork on GitHub. Click on 'Pull request' to send your changes
+   to the project maintainers for review.

-   ```bash
-   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
-   ```
-
-   This will build the documentation in the `~/tmp/test-build` folder where you can inspect the generated
-   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
-
-   Once you're happy with your changes, add the changed files with `git add` and
-   record your changes locally with `git commit`:
-
-   ```bash
-   git add modified_file.py
-   git commit
-   ```
-
-   Please remember to write [good commit
-   messages](https://chris.beams.io/posts/git-commit/) to clearly communicate the changes you made!
-
-   To keep your copy of the code up to date with the original
-   repository, rebase your branch on `upstream/branch` *before* you open a pull request or if requested by a maintainer:
-
-   ```bash
-   git fetch upstream
-   git rebase upstream/main
-   ```
-
-   Push your changes to your branch:
-
-   ```bash
-   git push -u origin a-descriptive-name-for-my-changes
-   ```
-
-   If you've already opened a pull request, you'll need to force push with the `--force` flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
-
-6. Now you can go to your fork of the repository on GitHub and click on **Pull Request** to open a pull request. Make sure you tick off all the boxes on our [checklist](#pull-request-checklist) below. When you're ready, you can send your changes to the project maintainers for review.
-
-7. It's ok if maintainers request changes, it happens to our core contributors
-   too! So everyone can see the changes in the pull request, work in your local
+7. It's ok if maintainers ask you for changes. It happens to core contributors
+   too! So everyone can see the changes in the Pull request, work in your local
   branch and push the changes to your fork. They will automatically appear in
   the pull request.

-### Pull request checklist

-☐ The pull request title should summarize your contribution.<br>
-☐ If your pull request addresses an issue, please mention the issue number in the pull
-request description to make sure they are linked (and people viewing the issue know you
-are working on it).<br>
-☐ To indicate a work in progress please prefix the title with `[WIP]`. These are
-useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.<br>
-☐ Make sure existing tests pass.<br>
-☐ If adding a new feature, also add tests for it.<br>
-   - If you are adding a new model, make sure you use
-     `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)` to trigger the common tests.
+### Checklist
+
+1. The title of your pull request should be a summary of its contribution;
+2. If your pull request addresses an issue, please mention the issue number in
+   the pull request description to make sure they are linked (and people
+   consulting the issue know you are working on it);
+3. To indicate a work in progress please prefix the title with `[WIP]`. These
+   are useful to avoid duplicated work, and to differentiate it from PRs ready
+   to be merged;
+4. Make sure existing tests pass;
+5. Add high-coverage tests. No quality testing = no merge.
+   - If you are adding a new model, make sure that you use
+     `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)`, which triggers the common tests.
   - If you are adding new `@slow` tests, make sure they pass using
-     `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
-   - If you are adding a new tokenizer, write tests and make sure
-     `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py` passes.
-   - CircleCI does not run the slow tests, but GitHub Actions does every night!<br>
-
-☐ All public methods must have informative docstrings (see
-[`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py)
-for an example).<br>
-☐ Due to the rapidly growing repository, don't add any images, videos and other
-non-text files that'll significantly weigh down the repository. Instead, use a Hub
-repository such as [`hf-internal-testing`](https://huggingface.co/hf-internal-testing)
-to host these files and reference them by URL. We recommend placing documentation
-related images in the following repository:
-[huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images).
-You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
-
-For more information about the checks run on a pull request, take a look at our [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.
+     `RUN_SLOW=1 python -m pytest tests/test_my_new_model.py`.
+   - If you are adding a new tokenizer, write tests, and make sure
+     `RUN_SLOW=1 python -m pytest tests/test_tokenization_{your_model_name}.py` passes.
+   CircleCI does not run the slow tests, but github actions does every night!
+6. All public methods must have informative docstrings that work nicely with sphinx. See `modeling_ctrl.py` for an
+   example.

 ### Tests

 An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
-the [tests](https://github.com/huggingface/transformers/tree/main/tests) folder and examples tests in the
-[examples](https://github.com/huggingface/transformers/tree/main/examples) folder.
+the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the
+[examples folder](https://github.com/huggingface/transformers/tree/master/examples).

 We like `pytest` and `pytest-xdist` because it's faster. From the root of the
-repository, specify a *path to a subfolder or a test file* to run the test:
+repository, here's how to run tests with `pytest` for the library:

 ```bash
-python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+$ python -m pytest -n auto --dist=loadfile -s -v ./tests/
 ```

-Similarly, for the `examples` directory, specify a *path to a subfolder or test file* to run the test. For example, the following command tests the text classification subfolder in the PyTorch `examples` directory:
+and for the examples:

 ```bash
-pip install -r examples/xxx/requirements.txt  # only needed the first time
-python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+$ pip install -r examples/requirements.txt  # only needed the first time
+$ python -m pytest -n auto --dist=loadfile -s -v ./examples/
 ```
+In fact, that's how `make test` and `make test-examples` are implemented (sans the `pip install` line)!

-In fact, this is actually how our `make test` and `make test-examples` commands are implemented (not including the `pip install`)!
-
-You can also specify a smaller set of tests in order to test only the feature
+You can specify a smaller set of tests in order to test only the feature
 you're working on.

-By default, slow tests are skipped but you can set the `RUN_SLOW` environment variable to
-`yes` to run them. This will download many gigabytes of models so make sure you
-have enough disk space, a good internet connection or a lot of patience!
-
-<Tip warning={true}>
-
-Remember to specify a *path to a subfolder or a test file* to run the test. Otherwise, you'll run all the tests in the `tests` or `examples` folder, which will take a very long time!
-
-</Tip>
+By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to
+`yes` to run them. This will download many gigabytes of models — make sure you
+have enough disk space and a good Internet connection, or a lot of patience!

 ```bash
-RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
-RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/
+$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/
 ```

-Like the slow tests, there are other environment variables available which are not enabled by default during testing:
- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
-
-More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
+Likewise, set the `RUN_CUSTOM_TOKENIZERS` environment variable to `yes` to run
+tests for custom tokenizers, which don't run by default either.

 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
 `pytest`-specific features in the test suite itself.
@ -353,43 +312,44 @@ This means `unittest` is fully supported. Here's how to run tests with
 `unittest`:

 ```bash
-python -m unittest discover -s tests -t . -v
-python -m unittest discover -s examples -t examples -v
+$ python -m unittest discover -s tests -t . -v
+$ python -m unittest discover -s examples -t examples -v
 ```

+
 ### Style guide

-For documentation strings, 🤗 Transformers follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html).
-Check our [documentation writing guide](https://github.com/huggingface/transformers/tree/main/docs#writing-documentation---specification)
+For documentation strings, `transformers` follows the [google style](https://google.github.io/styleguide/pyguide.html).
+Check our [documentation writing guide](https://github.com/huggingface/transformers/tree/master/docs#writing-documentation---specification)
 for more information.

+#### This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md)
+
+
 ### Develop on Windows

-On Windows (unless you're working in [Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/) or WSL), you need to configure git to transform Windows `CRLF` line endings to Linux `LF` line endings:
+On windows, you need to configure git to transform Windows `CRLF` line endings to Linux `LF` line endings:

-```bash
-git config core.autocrlf input
-```
+`git config core.autocrlf input`

-One way to run the `make` command on Windows is with MSYS2:
+One way one can run the make command on Window is to pass by MSYS2:

-1. [Download MSYS2](https://www.msys2.org/), and we assume it's installed in `C:\msys64`.
-2. Open the command line `C:\msys64\msys2.exe` (it should be available from the **Start** menu).
-3. Run in the shell: `pacman -Syu` and install `make` with `pacman -S make`.
+1. [Download MSYS2](https://www.msys2.org/), we assume to have it installed in C:\msys64
+2. Open the command line C:\msys64\msys2.exe (it should be available from the start menu)
+3. Run in the shell: `pacman -Syu` and install make with `pacman -S make`
 4. Add `C:\msys64\usr\bin` to your PATH environment variable.

-You can now use `make` from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+You can now use `make` from any terminal (Powershell, cmd.exe, etc) 🎉

-### Sync a forked repository with upstream main (the Hugging Face repository)
+### Syncing forked master with upstream (HuggingFace) master

-When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
-
-1. When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+To avoid pinging the upstream repository which adds reference notes to each upstream PR and sends unnessary notifications to the developers involved in these PRs, 
+when syncing the master branch of a forked repository, please, follow these steps:
+1. When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead merge directly into the forked master.
 2. If a PR is absolutely necessary, use the following steps after checking out your branch:
-
-   ```bash
-   git checkout -b your-branch-for-syncing
-   git pull --squash --no-commit upstream main
-   git commit -m '<your message without GitHub references>'
-   git push --set-upstream origin your-branch-for-syncing
-   ```
+```
+$ git checkout -b your-branch-for-syncing
+$ git pull --squash --no-commit upstream master
+$ git commit -m '<your message without GitHub references>'
+$ git push --set-upstream origin your-branch-for-syncing
+```
--- a/ISSUES.md
+++ b/ISSUES.md
@ -18,7 +18,7 @@ limitations under the License.

 This is an Open Source Project so please be mindful that like in any other project of this kind there is no obligation to answer all requests for help.

-However, we want to encourage you to ask for help whenever you think it's needed! We are happy about every question we get because it allows us to better understand your needs, possible misunderstandings, and most importantly a way for you to help us make this library better. That being said, this document's main purpose is to provide guidelines at how you can formulate your requests to increase your chances to be understood and to get support.
+However, we want to encourage you to ask for help whenever you think it's needed! We are happy about every  question we get because it allows us to better understand your needs, possible misunderstandings, and most importantly a way for you to help us make this library better. That being said, this document's main purpose is to provide guidelines at how you can formulate your requests to increase your chances to be understood and to get support.

 There are two main venues to receive support: [the forums](https://discuss.huggingface.co/) and [the GitHub issues](https://github.com/huggingface/transformers/issues).

@ -26,7 +26,7 @@ There are two main venues to receive support: [the forums](https://discuss.huggi

 [The user forums](https://discuss.huggingface.co/) are supported by the wide community of the library users and backed up by developers when needed.

-If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystallized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
+If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystalized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).

 In particular all "Please explain" questions or objectively very user-specific feature requests belong to the forums. Here are some example of such questions:

@ -71,8 +71,8 @@ You are not required to read the following guidelines before opening an issue. H
     File "/transformers/src/transformers/__init__.py", line 34, in <module>
       from . import dependency_versions_check
     File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
-       from .utils import is_tokenizers_available
-     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
+       from .file_utils import is_tokenizers_available
+     File "/transformers/src/transformers/file_utils.py", line 40, in <module>
       from tqdm.auto import tqdm
    ModuleNotFoundError: No module named 'tqdm.auto'
    ```
@ -124,8 +124,8 @@ You are not required to read the following guidelines before opening an issue. H
     File "/transformers/src/transformers/__init__.py", line 34, in <module>
       from . import dependency_versions_check
     File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
-       from .utils import is_tokenizers_available
-     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
+       from .file_utils import is_tokenizers_available
+     File "/transformers/src/transformers/file_utils.py", line 40, in <module>
       from tqdm.auto import tqdm
   ModuleNotFoundError: No module named 'tqdm.auto'
   ```
@ -152,13 +152,13 @@ You are not required to read the following guidelines before opening an issue. H

   ```bash
    cd examples/seq2seq
-    torchrun --nproc_per_node=2 ./finetune_trainer.py \
+    python -m torch.distributed.launch --nproc_per_node=2 ./finetune_trainer.py \
    --model_name_or_path sshleifer/distill-mbart-en-ro-12-4 --data_dir wmt_en_ro \
    --output_dir output_dir --overwrite_output_dir \
    --do_train --n_train 500 --num_train_epochs 1 \
    --per_device_train_batch_size 1  --freeze_embeds \
    --src_lang en_XX --tgt_lang ro_RO --task translation \
-    --fp16
+    --fp16 --sharded_ddp
   ```

   If you don't break it up, one has to scroll horizontally which often makes it quite difficult to quickly see what's happening.
@ -205,9 +205,7 @@ You are not required to read the following guidelines before opening an issue. H

   If you really tried to make a short reproducible code but couldn't figure it out, it might be that having a traceback will give the developer enough information to know what's going on. But if it is not enough and we can't reproduce the problem, we can't really solve it.

-   Do not despair if you can't figure it out from the beginning, just share what you can and perhaps someone else will be able to help you at the forums.
-
-   If your setup involves any custom datasets, the best way to help us reproduce the problem is to create a [Google Colab notebook](https://colab.research.google.com/) that demonstrates the issue and once you verify that the issue still exists, include a link to that notebook in the Issue. Just make sure that you don't copy and paste the location bar url of the open notebook - as this is private and we won't be able to open it. Instead, you need to click on `Share` in the right upper corner of the notebook, select `Get Link` and then copy and paste the public link it will give to you.
+   Do not dispair if you can't figure it out from the begining, just share what you can and perhaps someone else will be able to help you at the forums.

 7. If you forked off some of this project's code or example applications, please, do not ask us to go into your code repository and figure out what you may have done. The code is already very complex and unless there is an easy way to do a diff and it's a small diff, it won't be possible to find someone with time on their hands to make a lengthy investigation. Albeit, you might find someone at the forums who will be generous to do this for you.

@ -263,9 +261,9 @@ You are not required to read the following guidelines before opening an issue. H
    But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:

    ```
-    > How big is your GPU cluster?
+    > How big is your gpu cluster?

-    Our cluster is made of 256 GPUs.
+    Our cluster is made of 256 gpus.
    ```

    If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1 @@
+include LICENSE
--- a/103
+++ b/103
@ -1,18 +1,15 @@
-.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples benchmark
+.PHONY: deps_table_update modified_only_fixup extra_quality_checks quality style fixup fix-copies test test-examples docs

-# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
-export PYTHONPATH = src

 check_dirs := examples tests src utils

-exclude_folders :=  ""
-
 modified_only_fixup:
 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
 	@if test -n "$(modified_py_files)"; then \
 		echo "Checking/fixing $(modified_py_files)"; \
-		ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
-		ruff format $(modified_py_files) --exclude $(exclude_folders);\
+		black $(modified_py_files); \
+		isort $(modified_py_files); \
+		flake8 $(modified_py_files); \
 	else \
 		echo "No library .py files were modified"; \
 	fi
@ -22,67 +19,40 @@ modified_only_fixup:
 deps_table_update:
 	@python setup.py deps_table_update

-deps_table_check_updated:
-	@md5sum src/transformers/dependency_versions_table.py > md5sum.saved
-	@python setup.py deps_table_update
-	@md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
-	@rm md5sum.saved
+# Check that source code meets quality standards

-# autogenerating code
-
-autogenerate_code: deps_table_update
-
-# Check that the repo is in a good state
-
-repo-consistency:
+extra_quality_checks: deps_table_update
 	python utils/check_copies.py
-	python utils/check_modular_conversion.py
+	python utils/check_table.py
 	python utils/check_dummies.py
 	python utils/check_repo.py
-	python utils/check_inits.py
-	python utils/check_config_docstrings.py
-	python utils/check_config_attributes.py
-	python utils/check_doctest_list.py
-	python utils/update_metadata.py --check-only
-	python utils/check_docstrings.py
+	python utils/style_doc.py src/transformers docs/source --max_len 119

 # this target runs checks on all files
-
 quality:
-	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
-	ruff check $(check_dirs) setup.py conftest.py
-	ruff format --check $(check_dirs) setup.py conftest.py
-	python utils/sort_auto_mappings.py --check_only
-	python utils/check_doc_toc.py
-	python utils/check_docstrings.py --check_all
-
+	black --check $(check_dirs)
+	isort --check-only $(check_dirs)
+	flake8 $(check_dirs)
+	python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
+	${MAKE} extra_quality_checks

 # Format source code automatically and check is there are any problems left that need manual fixing

-extra_style_checks:
-	python utils/sort_auto_mappings.py
-	python utils/check_doc_toc.py --fix_and_overwrite
-
-# this target runs checks on all files and potentially modifies some of them
-
-style:
-	ruff check $(check_dirs) setup.py conftest.py --fix --exclude $(exclude_folders)
-	ruff format $(check_dirs) setup.py conftest.py --exclude $(exclude_folders)
-	${MAKE} autogenerate_code
-	${MAKE} extra_style_checks
+style: deps_table_update
+	black $(check_dirs)
+	isort $(check_dirs)
+	python utils/style_doc.py src/transformers docs/source --max_len 119

 # Super fast fix and check target that only works on relevant modified files since the branch was made

-fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
+fixup: modified_only_fixup extra_quality_checks

 # Make marked copies of snippets of codes conform to the original

 fix-copies:
 	python utils/check_copies.py --fix_and_overwrite
-	python utils/check_modular_conversion.py --fix_and_overwrite
+	python utils/check_table.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
-	python utils/check_doctest_list.py --fix_and_overwrite
-	python utils/check_docstrings.py --fix_and_overwrite

 # Run tests for the library

@ -92,36 +62,9 @@ test:
 # Run tests for examples

 test-examples:
-	python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
+	python -m pytest -n auto --dist=loadfile -s -v ./examples/

-# Run benchmark
+# Check that docs can build

-benchmark:
-	python3 benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=diff backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
-
-# Run tests for SageMaker DLC release
-
-test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
-	TEST_SAGEMAKER=True python -m pytest -n auto  -s -v ./tests/sagemaker
-
-
-# Release stuff
-
-pre-release:
-	python utils/release.py
-
-pre-patch:
-	python utils/release.py --patch
-
-post-release:
-	python utils/release.py --post_release
-
-post-patch:
-	python utils/release.py --post_release --patch
-
-build-release:
-	rm -rf dist
-	rm -rf build
-	python setup.py bdist_wheel
-	python setup.py sdist
-	python utils/check_build.py
+docs:
+	cd docs && make html SPHINXOPTS="-W -j 4"
--- a/README.md
+++ b/README.md
@ -15,298 +15,241 @@ limitations under the License.
 -->

 <p align="center">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-dark.svg">
-    <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-light.svg">
-    <img alt="Hugging Face Transformers Library" src="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-light.svg" width="352" height="59" style="max-width: 100%;">
-  </picture>
-  <br/>
-  <br/>
-</p>
-
+    <br>
+    <img src="https://raw.githubusercontent.com/huggingface/transformers/master/docs/source/imgs/transformers_logo_name.png" width="400"/>
+    <br>
+<p>
 <p align="center">
-    <a href="https://huggingface.com/models"><img alt="Checkpoints on Hub" src="https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen"></a>
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
-    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/master">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/master/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/transformers/index.html">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/transformers/index.html.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/master/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
 </p>

-<h4 align="center">
-    <p>
-        <b>English</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
-    </p>
-</h4>
-
 <h3 align="center">
-    <p>State-of-the-art pretrained models for inference and training</p>
+<p>State-of-the-art Natural Language Processing for PyTorch and TensorFlow 2.0
 </h3>

-<h3 align="center">
-    <a href="https://hf.co/course"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/course_banner.png"></a>
-</h3>
+🤗 Transformers provides thousands of pretrained models to perform tasks on texts such as classification, information extraction, question answering, summarization, translation, text generation, etc in 100+ languages. Its aim is to make cutting-edge NLP easier to use for everyone.

-Transformers is a library of pretrained text, computer vision, audio, video, and multimodal models for inference and training. Use Transformers to fine-tune models on your data, build inference applications, and for generative AI use cases across multiple modalities.
+🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets then share them with the community on our [model hub](https://huggingface.co/models). At the same time, each python module defining an architecture can be used as a standalone and modified to enable quick research experiments.

-There are over 500K+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use.
+🤗 Transformers is backed by the two most popular deep learning libraries, [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/), with a seamless integration between them, allowing you to train your models with one then load it for inference with the other.

-Explore the [Hub](https://huggingface.com/) today to find a model and use Transformers to help you get started right away.
+## Online demos

-## Installation
+You can test most of our models directly on their pages from the [model hub](https://huggingface.co/models). We also offer [private model hosting, versioning, & an inference API](https://huggingface.co/pricing) to use those models.

-Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.1+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+.
+Here are a few examples:
+- [Masked word completion with BERT](https://huggingface.co/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France)
+- [Name Entity Recognition with Electra](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city)
+- [Text generation with GPT-2](https://huggingface.co/gpt2?text=A+long+time+ago%2C+)
+- [Natural Langugage Inference with RoBERTa](https://huggingface.co/roberta-large-mnli?text=The+dog+was+lost.+Nobody+lost+any+animal)
+- [Summarization with BART](https://huggingface.co/facebook/bart-large-cnn?text=The+tower+is+324+metres+%281%2C063+ft%29+tall%2C+about+the+same+height+as+an+81-storey+building%2C+and+the+tallest+structure+in+Paris.+Its+base+is+square%2C+measuring+125+metres+%28410+ft%29+on+each+side.+During+its+construction%2C+the+Eiffel+Tower+surpassed+the+Washington+Monument+to+become+the+tallest+man-made+structure+in+the+world%2C+a+title+it+held+for+41+years+until+the+Chrysler+Building+in+New+York+City+was+finished+in+1930.+It+was+the+first+structure+to+reach+a+height+of+300+metres.+Due+to+the+addition+of+a+broadcasting+aerial+at+the+top+of+the+tower+in+1957%2C+it+is+now+taller+than+the+Chrysler+Building+by+5.2+metres+%2817+ft%29.+Excluding+transmitters%2C+the+Eiffel+Tower+is+the+second+tallest+free-standing+structure+in+France+after+the+Millau+Viaduct)
+- [Question answering with DistilBERT](https://huggingface.co/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species)
+- [Translation with T5](https://huggingface.co/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin)

-Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager.
+**[Write With Transformer](https://transformer.huggingface.co)**, built by the Hugging Face team, is the official demo of this repo’s text generation capabilities.

-```py
-# venv
-python -m venv .my-env
-source .my-env/bin/activate
-# uv
-uv venv .my-env
-source .my-env/bin/activate
+## Quick tour
+
+To immediately use a model on a given text, we provide the `pipeline` API. Pipelines group together a pretrained model with the preprocessing that was used during that model training. Here is how to quickly use a pipeline to classify positive versus negative texts
+
+```python
+>>> from transformers import pipeline
+
+# Allocate a pipeline for sentiment-analysis
+>>> classifier = pipeline('sentiment-analysis')
+>>> classifier('We are very happy to include pipeline into the transformers repository.')
+[{'label': 'POSITIVE', 'score': 0.9978193640708923}]
 ```

-Install Transformers in your virtual environment.
+The second line of code downloads and caches the pretrained model used by the pipeline, the third line evaluates it on the given text. Here the answer is "positive" with a confidence of 99.8%.

-```py
-# pip
-pip install "transformers[torch]"
+This is another example of pipeline used for that can extract question answers from some context:
+
+``` python
+>>> from transformers import pipeline
+
+# Allocate a pipeline for question-answering
+>>> question_answerer = pipeline('question-answering')
+>>> question_answerer({
+...     'question': 'What is the name of the repository ?',
+...     'context': 'Pipeline have been included in the huggingface/transformers repository'
+... })
+{'score': 0.5135612454720828, 'start': 35, 'end': 59, 'answer': 'huggingface/transformers'}

-# uv
-uv pip install "transformers[torch]"
 ```

-Install Transformers from source if you want the latest changes in the library or are interested in contributing. However, the *latest* version may not be stable. Feel free to open an [issue](https://github.com/huggingface/transformers/issues) if you encounter an error.
+On top of the answer, the pretrained model used here returned its confidence score, along with the start position and its end position in the tokenized sentence. You can learn more about the tasks supported by the `pipeline` API in [this tutorial](https://huggingface.co/transformers/task_summary.html).

-```shell
-git clone https://github.com/huggingface/transformers.git
-cd transformers
+To download and use any of the pretrained models on your given task, you just need to use those three lines of codes (PyTorch version):
+```python
+>>> from transformers import AutoTokenizer, AutoModel

-# pip
-pip install .[torch]
+>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+>>> model = AutoModel.from_pretrained("bert-base-uncased")

-# uv
-uv pip install .[torch]
+>>> inputs = tokenizer("Hello world!", return_tensors="pt")
+>>> outputs = model(**inputs)
+```
+or for TensorFlow:
+```python
+>>> from transformers import AutoTokenizer, TFAutoModel
+
+>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+>>> model = TFAutoModel.from_pretrained("bert-base-uncased")
+
+>>> inputs = tokenizer("Hello world!", return_tensors="tf")
+>>> outputs = model(**inputs)
 ```

-## Quickstart
+The tokenizer is responsible for all the preprocessing the pretrained model expects, and can be called directly on one (or list) of texts (as we can see on the fourth line of both code examples). It will output a dictionary you can directly pass to your model (which is done on the fifth line).

-Get started with Transformers right away with the [Pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) API. The `Pipeline` is a high-level inference class that supports text, audio, vision, and multimodal tasks. It handles preprocessing the input and returns the appropriate output.
+The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) (depending on your backend) which you can use normally. For instance, [this tutorial](https://huggingface.co/transformers/training.html) explains how to integrate such a model in classic PyTorch or TensorFlow training loop, or how to use our `Trainer` API to quickly fine-tune the on a new dataset.

-Instantiate a pipeline and specify model to use for text generation. The model is downloaded and cached so you can easily reuse it again. Finally, pass some text to prompt the model.
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-1.5B")
-pipeline("the secret to baking a really good cake is ")
-[{'generated_text': 'the secret to baking a really good cake is 1) to use the right ingredients and 2) to follow the recipe exactly. the recipe for the cake is as follows: 1 cup of sugar, 1 cup of flour, 1 cup of milk, 1 cup of butter, 1 cup of eggs, 1 cup of chocolate chips. if you want to make 2 cakes, how much sugar do you need? To make 2 cakes, you will need 2 cups of sugar.'}]
-```
-
-To chat with a model, the usage pattern is the same. The only difference is you need to construct a chat history (the input to `Pipeline`) between you and the system.
-
-> [!TIP]
-> You can also chat with a model directly from the command line.
-> ```shell
-> transformers chat Qwen/Qwen2.5-0.5B-Instruct
-> ```
-
-```py
-import torch
-from transformers import pipeline
-
-chat = [
-    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
-    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
-]
-
-pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
-response = pipeline(chat, max_new_tokens=512)
-print(response[0]["generated_text"][-1]["content"])
-```
-
-Expand the examples below to see how `Pipeline` works for different modalities and tasks.
-
-<details>
-<summary>Automatic speech recognition</summary>
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3")
-pipeline("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
-{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
-```
-
-</details>
-
-<details>
-<summary>Image classification</summary>
-
-<h3 align="center">
-    <a><img src="https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png"></a>
-</h3>
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="image-classification", model="facebook/dinov2-small-imagenet1k-1-layer")
-pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
-[{'label': 'macaw', 'score': 0.997848391532898},
- {'label': 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
-  'score': 0.0016551691805943847},
- {'label': 'lorikeet', 'score': 0.00018523589824326336},
- {'label': 'African grey, African gray, Psittacus erithacus',
-  'score': 7.85409429227002e-05},
- {'label': 'quail', 'score': 5.502637941390276e-05}]
-```
-
-</details>
-
-<details>
-<summary>Visual question answering</summary>
-
-
-<h3 align="center">
-    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg"></a>
-</h3>
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="visual-question-answering", model="Salesforce/blip-vqa-base")
-pipeline(
-    image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg",
-    question="What is in the image?",
-)
-[{'answer': 'statue of liberty'}]
-```
-
-</details>
-
-## Why should I use Transformers?
+## Why should I use transformers?

 1. Easy-to-use state-of-the-art models:
-    - High performance on natural language understanding & generation, computer vision, audio, video, and multimodal tasks.
-    - Low barrier to entry for researchers, engineers, and developers.
+    - High performance on NLU and NLG tasks.
+    - Low barrier to entry for educators and practitioners.
    - Few user-facing abstractions with just three classes to learn.
    - A unified API for using all our pretrained models.

 1. Lower compute costs, smaller carbon footprint:
-    - Share trained models instead of training from scratch.
-    - Reduce compute time and production costs.
-    - Dozens of model architectures with 1M+ pretrained checkpoints across all modalities.
+    - Researchers can share trained models instead of always retraining.
+    - Practitioners can reduce compute time and production costs.
+    - Dozens of architectures with over 2,000 pretrained models, some in more than 100 languages.

-1. Choose the right framework for every part of a models lifetime:
+1. Choose the right framework for every part of a model's lifetime:
    - Train state-of-the-art models in 3 lines of code.
-    - Move a single model between PyTorch/JAX/TF2.0 frameworks at will.
-    - Pick the right framework for training, evaluation, and production.
+    - Move a single model between TF2.0/PyTorch frameworks at will.
+    - Seamlessly pick the right framework for training, evaluation, production.

 1. Easily customize a model or an example to your needs:
-    - We provide examples for each architecture to reproduce the results published by its original authors.
-    - Model internals are exposed as consistently as possible.
+    - Examples for each architecture to reproduce the results by the official authors of said architecture.
+    - Expose the models internal as consistently as possible.
    - Model files can be used independently of the library for quick experiments.

-<a target="_blank" href="https://huggingface.co/enterprise">
-    <img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
-</a><br>
+## Why shouldn't I use transformers?

-## Why shouldn't I use Transformers?
+- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving in additional abstractions/files.
+- The training API is not intended to work on any model but is optimized to work with the models provided by the library. For generic machine learning loops, you should use another library.
+- While we strive to present as many use cases as possible, the scripts in our [examples folder](https://github.com/huggingface/transformers/tree/master/examples) are just that: examples. It is expected that they won't work out-of-the box on your specific problem and that you will be required to change a few lines of code to adapt them to your needs.

- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files.
- The training API is optimized to work with PyTorch models provided by Transformers. For generic machine learning loops, you should use another library like [Accelerate](https://huggingface.co/docs/accelerate).
- The [example scripts]((https://github.com/huggingface/transformers/tree/main/examples)) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work.
+## Installation

-## 100 projects using Transformers
+### With pip

-Transformers is more than a toolkit to use pretrained models, it's a community of projects built around it and the
-Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone
-else to build their dream projects.
+This repository is tested on Python 3.6+, PyTorch 1.0.0+ (PyTorch 1.3.1+ for [examples](https://github.com/huggingface/transformers/tree/master/examples)) and TensorFlow 2.0.

-In order to celebrate Transformers 100,000 stars, we wanted to put the spotlight on the
-community with the [awesome-transformers](./awesome-transformers.md) page which lists 100
-incredible projects built with Transformers.
+You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).

-If you own or use a project that you believe should be part of the list, please open a PR to add it!
+First, create a virtual environment with the version of Python you're going to use and activate it.

-## Example models
+Then, you will need to install at least one of TensorFlow 2.0, PyTorch or Flax.
+Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/pip#tensorflow-2.0-rc-is-available), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) regarding the specific install command for your platform and/or [Flax installation page](https://github.com/google/flax#quick-install).

-You can test most of our models directly on their [Hub model pages](https://huggingface.co/models).
+When TensorFlow 2.0 and/or PyTorch has been installed, 🤗 Transformers can be installed using pip as follows:

-Expand each modality below to see a few example models for various use cases.
+```bash
+pip install transformers
+```

-<details>
-<summary>Audio</summary>
+If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/transformers/installation.html#installing-from-source).

- Audio classification with [Whisper](https://huggingface.co/openai/whisper-large-v3-turbo)
- Automatic speech recognition with [Moonshine](https://huggingface.co/UsefulSensors/moonshine)
- Keyword spotting with [Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks)
- Speech to speech generation with [Moshi](https://huggingface.co/kyutai/moshiko-pytorch-bf16)
- Text to audio with [MusicGen](https://huggingface.co/facebook/musicgen-large)
- Text to speech with [Bark](https://huggingface.co/suno/bark)
+### With conda

-</details>
+Since Transformers version v4.0.0, we now have a conda channel: `huggingface`.

-<details>
-<summary>Computer vision</summary>
+🤗 Transformers can be installed using conda as follows:

- Automatic mask generation with [SAM](https://huggingface.co/facebook/sam-vit-base)
- Depth estimation with [DepthPro](https://huggingface.co/apple/DepthPro-hf)
- Image classification with [DINO v2](https://huggingface.co/facebook/dinov2-base)
- Keypoint detection with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor)
- Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue)
- Object detection with [RT-DETRv2](https://huggingface.co/PekingU/rtdetr_v2_r50vd)
- Pose Estimation with [VitPose](https://huggingface.co/usyd-community/vitpose-base-simple)
- Universal segmentation with [OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_swin_large)
- Video classification with [VideoMAE](https://huggingface.co/MCG-NJU/videomae-large)
+```shell script
+conda install -c huggingface transformers
+```

-</details>
+Follow the installation pages of TensorFlow, PyTorch or Flax to see how to install them with conda.

-<details>
-<summary>Multimodal</summary>
+## Models architectures

- Audio or text to text with [Qwen2-Audio](https://huggingface.co/Qwen/Qwen2-Audio-7B)
- Document question answering with [LayoutLMv3](https://huggingface.co/microsoft/layoutlmv3-base)
- Image or text to text with [Qwen-VL](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
- Image captioning [BLIP-2](https://huggingface.co/Salesforce/blip2-opt-2.7b)
- OCR-based document understanding with [GOT-OCR2](https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf)
- Table question answering with [TAPAS](https://huggingface.co/google/tapas-base)
- Unified multimodal understanding and generation with [Emu3](https://huggingface.co/BAAI/Emu3-Gen)
- Vision to text with [Llava-OneVision](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)
- Visual question answering with [Llava](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
- Visual referring expression segmentation with [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224)
+**[All the model checkpoints](https://huggingface.co/models)** provided by 🤗 Transformers are seamlessly integrated from the huggingface.co [model hub](https://huggingface.co) where they are uploaded directly by [users](https://huggingface.co/users) and [organizations](https://huggingface.co/organizations).

-</details>
+Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen)

-<details>
-<summary>NLP</summary>
+🤗 Transformers currently provides the following architectures (see [here](https://huggingface.co/transformers/model_summary.html) for a high-level summary of each them):

- Masked word completion with [ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base)
- Named entity recognition with [Gemma](https://huggingface.co/google/gemma-2-2b)
- Question answering with [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)
- Summarization with [BART](https://huggingface.co/facebook/bart-large-cnn)
- Translation with [T5](https://huggingface.co/google-t5/t5-base)
- Text generation with [Llama](https://huggingface.co/meta-llama/Llama-3.2-1B)
- Text classification with [Qwen](https://huggingface.co/Qwen/Qwen2.5-0.5B)
+1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
+1. **[BART](https://huggingface.co/transformers/model_doc/bart.html)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://arxiv.org/pdf/1910.13461.pdf) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer.
+1. **[BARThez](https://huggingface.co/transformers/model_doc/barthez.html)** (from École polytechnique) released with the paper [BARThez: a Skilled Pretrained French Sequence-to-Sequence Model](https://arxiv.org/abs/2010.12321) by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis.
+1. **[BERT](https://huggingface.co/transformers/model_doc/bert.html)** (from Google) released with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova.
+1. **[BERT For Sequence Generation](https://huggingface.co/transformers/model_doc/bertgeneration.html)** (from Google) released with the paper [Leveraging Pre-trained Checkpoints for Sequence Generation Tasks](https://arxiv.org/abs/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+1. **[Blenderbot](https://huggingface.co/transformers/model_doc/blenderbot.html)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://arxiv.org/abs/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
+1. **[BlenderbotSmall](https://huggingface.co/transformers/model_doc/blenderbot_small.html)** (from Facebook) released with the paper [Recipes for building an open-domain chatbot](https://arxiv.org/abs/2004.13637) by Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston.
+1. **[BORT](https://huggingface.co/transformers/model_doc/bort.html)** (from Alexa) released with the paper [Optimal Subarchitecture Extraction For BERT](https://arxiv.org/abs/2010.10499) by Adrian de Wynter and Daniel J. Perry.
+1. **[CamemBERT](https://huggingface.co/transformers/model_doc/camembert.html)** (from Inria/Facebook/Sorbonne) released with the paper [CamemBERT: a Tasty French Language Model](https://arxiv.org/abs/1911.03894) by Louis Martin*, Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah and Benoît Sagot.
+1. **[ConvBERT](https://huggingface.co/transformers/model_doc/convbert.html)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
+1. **[CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://arxiv.org/abs/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
+1. **[DeBERTa](https://huggingface.co/transformers/model_doc/deberta.html)** (from Microsoft Research) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
+1. **[DialoGPT](https://huggingface.co/transformers/model_doc/dialogpt.html)** (from Microsoft Research) released with the paper [DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation](https://arxiv.org/abs/1911.00536) by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan.
+1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.
+1. **[DPR](https://huggingface.co/transformers/model_doc/dpr.html)** (from Facebook) released with the paper [Dense Passage Retrieval
+for Open-Domain Question Answering](https://arxiv.org/abs/2004.04906) by Vladimir Karpukhin, Barlas Oğuz, Sewon
+Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
+1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning.
+1. **[FlauBERT](https://huggingface.co/transformers/model_doc/flaubert.html)** (from CNRS) released with the paper [FlauBERT: Unsupervised Language Model Pre-training for French](https://arxiv.org/abs/1912.05372) by Hang Le, Loïc Vial, Jibril Frej, Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, Laurent Besacier, Didier Schwab.
+1. **[Funnel Transformer](https://huggingface.co/transformers/model_doc/funnel.html)** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing](https://arxiv.org/abs/2006.03236) by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
+1. **[GPT](https://huggingface.co/transformers/model_doc/gpt.html)** (from OpenAI) released with the paper [Improving Language Understanding by Generative Pre-Training](https://blog.openai.com/language-unsupervised/) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever.
+1. **[GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html)** (from OpenAI) released with the paper [Language Models are Unsupervised Multitask Learners](https://blog.openai.com/better-language-models/) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**.
+1. **[LayoutLM](https://huggingface.co/transformers/model_doc/layoutlm.html)** (from Microsoft Research Asia) released with the paper [LayoutLM: Pre-training of Text and Layout for Document Image Understanding](https://arxiv.org/abs/1912.13318) by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou.
+1. **[LED](https://huggingface.co/transformers/model_doc/led.html)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+1. **[Longformer](https://huggingface.co/transformers/model_doc/longformer.html)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+1. **[LXMERT](https://huggingface.co/transformers/model_doc/lxmert.html)** (from UNC Chapel Hill) released with the paper [LXMERT: Learning Cross-Modality Encoder Representations from Transformers for Open-Domain Question Answering](https://arxiv.org/abs/1908.07490) by Hao Tan and Mohit Bansal.
+1. **[MarianMT](https://huggingface.co/transformers/model_doc/marian.html)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
+1. **[MBart](https://huggingface.co/transformers/model_doc/mbart.html)** (from Facebook) released with the paper [Multilingual Denoising Pre-training for Neural Machine Translation](https://arxiv.org/abs/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+1. **[MPNet](https://huggingface.co/transformers/model_doc/mpnet.html)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+1. **[MT5](https://huggingface.co/transformers/model_doc/mt5.html)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
+1. **[Pegasus](https://huggingface.co/transformers/model_doc/pegasus.html)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777)> by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
+1. **[ProphetNet](https://huggingface.co/transformers/model_doc/prophetnet.html)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
+1. **[Reformer](https://huggingface.co/transformers/model_doc/reformer.html)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+1. **[RoBERTa](https://huggingface.co/transformers/model_doc/roberta.html)** (from Facebook), released together with the paper a [Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
+1. **[SqueezeBert](https://huggingface.co/transformers/model_doc/squeezebert.html)** released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
+1. **[T5](https://huggingface.co/transformers/model_doc/t5.html)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
+1. **[TAPAS](https://huggingface.co/transformers/model_doc/tapas.html)** (from Google AI) released with the paper [TAPAS: Weakly Supervised Table Parsing via Pre-training](https://arxiv.org/abs/2004.02349) by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos.
+1. **[Transformer-XL](https://huggingface.co/transformers/model_doc/transformerxl.html)** (from Google/CMU) released with the paper [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](https://arxiv.org/abs/1901.02860) by Zihang Dai*, Zhilin Yang*, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov.
+1. **[Wav2Vec2](https://huggingface.co/transformers/model_doc/wav2vec2.html)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+1. **[XLM](https://huggingface.co/transformers/model_doc/xlm.html)** (from Facebook) released together with the paper [Cross-lingual Language Model Pretraining](https://arxiv.org/abs/1901.07291) by Guillaume Lample and Alexis Conneau.
+1. **[XLM-ProphetNet](https://huggingface.co/transformers/model_doc/xlmprophetnet.html)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
+1. **[XLM-RoBERTa](https://huggingface.co/transformers/model_doc/xlmroberta.html)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://arxiv.org/abs/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov.
+1. **[XLNet](https://huggingface.co/transformers/model_doc/xlnet.html)** (from Google/CMU) released with the paper [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) by Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le.
+1. Want to contribute a new model? We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them in the [`templates`](./templates) folder of the repository. Be sure to check the [contributing guidelines](./CONTRIBUTING.md) and contact the maintainers or open an issue to collect feedbacks before starting your PR.

-</details>
+To check if each model has an implementation in PyTorch/TensorFlow/Flax or has an associated tokenizer backed by the 🤗 Tokenizers library, refer to [this table](https://huggingface.co/transformers/index.html#bigtable)
+
+These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations. You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/transformers/examples.html).
+
+
+## Learn more
+
+| Section | Description |
+|-|-|
+| [Documentation](https://huggingface.co/transformers/) | Full API documentation and tutorials |
+| [Task summary](https://huggingface.co/transformers/task_summary.html) | Tasks supported by 🤗 Transformers |
+| [Preprocessing tutorial](https://huggingface.co/transformers/preprocessing.html) | Using the `Tokenizer` class to prepare data for the models |
+| [Training and fine-tuning](https://huggingface.co/transformers/training.html) | Using the models provided by 🤗 Transformers in a PyTorch/TensorFlow training loop and the `Trainer` API |
+| [Quick tour: Fine-tuning/usage scripts](https://github.com/huggingface/transformers/tree/master/examples) | Example scripts for fine-tuning models on a wide range of tasks |
+| [Model sharing and uploading](https://huggingface.co/transformers/model_sharing.html) | Upload and share your fine-tuned models with the community |
+| [Migration](https://huggingface.co/transformers/migration.html) | Migrate to 🤗 Transformers from `pytorch-transformers` or `pytorch-pretrained-bert` |

 ## Citation

--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,32 +0,0 @@
-# Security Policy
-
-## Hugging Face Hub, remote artefacts, and remote code
-
-Transformers is open-source software that is tightly coupled to the Hugging Face Hub. While you have the ability to use it
-offline with pre-downloaded model weights, it provides a very simple way to download, use, and manage models locally.
-
-When downloading artefacts that have been uploaded by others on any platform, you expose yourself to risks. Please
-read below for the security recommendations in order to keep your runtime and local environment safe.
-
-### Remote artefacts
-
-Models uploaded on the Hugging Face Hub come in different formats. We heavily recommend uploading and downloading
-models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
-by the transformers library), as developed specifically to prevent arbitrary code execution on your system.
-
-To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
-
-### Remote code
-
-#### Modeling
-
-Transformers supports many model architectures, but is also the bridge between your Python runtime and models that
-are stored in model repositories on the Hugging Face Hub.
-
-These models require the `trust_remote_code=True` parameter to be set when using them; please **always** verify
-the content of the modeling files when using this argument. We recommend setting a revision in order to ensure you
-protect yourself from updates on the repository.
-
-## Reporting a Vulnerability
-
-Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -1,609 +0,0 @@
-# Awesome projects built with Transformers
-
-This page lists awesome projects built on top of Transformers. Transformers is more than a toolkit to use pretrained
-models: it's a community of projects built around it and the Hugging Face Hub. We want Transformers to enable
-developers, researchers, students, professors, engineers, and anyone else to build their dream projects.
-
-In this list, we showcase incredibly impactful and novel projects that have pushed the field forward. We celebrate
-100 of these projects as we reach the milestone of 100k stars as a community; but we're very open to pull requests
-adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR 
-to add it.
-
-## [gpt4all](https://github.com/nomic-ai/gpt4all)
-
-[gpt4all](https://github.com/nomic-ai/gpt4all) is an ecosystem of open-source chatbots trained on massive collections of clean assistant data including code, stories and dialogue. It offers open-source, large language models such as LLaMA and GPT-J trained in an assistant-style.
-
-Keywords: Open-source, LLaMa, GPT-J, instruction, assistant
-
-## [recommenders](https://github.com/recommenders-team/recommenders)
-
-This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization
-
-Keywords: Recommender systems, AzureML
-
-## [IOPaint](https://github.com/Sanster/IOPaint)
-
-Image inpainting tool powered by Stable Diffusion. Remove any unwanted object, defect, people from your pictures or erase and replace anything on your pictures.
-
-Keywords: inpainting, SD, Stable Diffusion
-
-## [flair](https://github.com/flairNLP/flair)
-
-FLAIR is a powerful PyTorch NLP framework, covering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.
-
-Keywords: NLP, text embedding, document embedding, biomedical, NER, PoS, sentiment-analysis
-
-## [mindsdb](https://github.com/mindsdb/mindsdb)
-
-MindsDB is a low-code ML platform, which automates and integrates several ML frameworks into the data stack as "AI Tables" to streamline the integration of AI into applications, making it accessible to developers of all skill levels.
-
-Keywords: Database, low-code, AI table
-
-## [langchain](https://github.com/langchain-ai/langchain)
-
-[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
-
-Keywords: LLMs, Large Language Models, Agents, Chains
-
-## [LlamaIndex](https://github.com/run-llama/llama_index)
-
-[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
-
-Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation 
-
-## [ParlAI](https://github.com/facebookresearch/ParlAI)
-
-[ParlAI](https://github.com/facebookresearch/ParlAI) is a python framework for sharing, training and testing dialogue models, from open-domain chitchat, to task-oriented dialogue, to visual question answering. It provides more than 100 datasets under the same API, a large zoo of pretrained models, a set of agents, and has several integrations.
-
-Keywords: Dialogue, Chatbots, VQA, Datasets, Agents
-
-## [sentence-transformers](https://github.com/UKPLab/sentence-transformers)
-
-This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various task. Text is embedding in vector space such that similar text is close and can efficiently be found using cosine similarity.
-
-Keywords: Dense vector representations, Text embeddings, Sentence embeddings
-
-## [ludwig](https://github.com/ludwig-ai/ludwig)
-
-Ludwig is a declarative machine learning framework that makes it easy to define machine learning pipelines using a simple and flexible data-driven configuration system. Ludwig is targeted at a wide variety of AI tasks. It provides a data-driven configuration system, training, prediction, and evaluation scripts, as well as a programmatic API.
-
-Keywords: Declarative, Data-driven, ML Framework
-
-## [InvokeAI](https://github.com/invoke-ai/InvokeAI)
-
-[InvokeAI](https://github.com/invoke-ai/InvokeAI) is an engine for Stable Diffusion models, aimed at professionals, artists, and enthusiasts. It leverages the latest AI-driven technologies through CLI as well as a WebUI.
-
-Keywords: Stable-Diffusion, WebUI, CLI
-
-## [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP)
-
-[PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) is an easy-to-use and powerful NLP library particularly targeted at the Chinese languages. It has support for multiple pre-trained model zoos, and supports a wide-range of NLP tasks from research to industrial applications.
-
-Keywords: NLP, Chinese, Research, Industry
-
-## [stanza](https://github.com/stanfordnlp/stanza)
-
-The Stanford NLP Group's official Python NLP library. It contains support for running various accurate natural language processing tools on 60+ languages and for accessing the Java Stanford CoreNLP software from Python.
-
-Keywords: NLP, Multilingual, CoreNLP
-
-## [DeepPavlov](https://github.com/deeppavlov/DeepPavlov)
-
-[DeepPavlov](https://github.com/deeppavlov/DeepPavlov) is an open-source conversational AI library. It is designed for the development of production ready chat-bots and complex conversational systems, as well as research in the area of NLP and, particularly, of dialog systems.
-
-Keywords: Conversational, Chatbot, Dialog
-
-## [alpaca-lora](https://github.com/tloen/alpaca-lora)
-
-Alpaca-lora contains code for reproducing the Stanford Alpaca results using low-rank adaptation (LoRA). The repository provides training (fine-tuning) as well as generation scripts.
-
-Keywords: LoRA, Parameter-efficient fine-tuning
-
-## [imagen-pytorch](https://github.com/lucidrains/imagen-pytorch)
-
-An open-source Implementation of Imagen, Google's closed-source Text-to-Image Neural Network that beats DALL-E2. As of release, it is the new SOTA for text-to-image synthesis.
-
-Keywords: Imagen, Text-to-image
-
-## [adapters](https://github.com/adapter-hub/adapters)
-
-[adapters](https://github.com/adapter-hub/adapters) is an extension of HuggingFace's Transformers library, integrating adapters into state-of-the-art language models by incorporating AdapterHub, a central repository for pre-trained adapter modules. It is a drop-in replacement for transformers, which is regularly updated to stay up-to-date with the developments of transformers.
-
-Keywords: Adapters, LoRA, Parameter-efficient fine-tuning, Hub
-
-## [NeMo](https://github.com/NVIDIA/NeMo)
-
-NVIDIA [NeMo](https://github.com/NVIDIA/NeMo) is a conversational AI toolkit built for researchers working on automatic speech recognition (ASR), text-to-speech synthesis (TTS), large language models (LLMs), and natural language processing (NLP). The primary objective of [NeMo](https://github.com/NVIDIA/NeMo) is to help researchers from industry and academia to reuse prior work (code and pretrained models) and make it easier to create new https://developer.nvidia.com/conversational-ai#started.
-
-Keywords: Conversational, ASR, TTS, LLMs, NLP
-
-## [Runhouse](https://github.com/run-house/runhouse)
-
-[Runhouse](https://github.com/run-house/runhouse) allows to send code and data to any of your compute or data infra, all in Python, and continue to interact with them normally from your existing code and environment. Runhouse developers mention:
-
-> Think of it as an expansion pack to your Python interpreter that lets it take detours to remote machines or manipulate remote data.
-
-Keywords: MLOps, Infrastructure, Data storage, Modeling
-
-## [MONAI](https://github.com/Project-MONAI/MONAI)
-
-[MONAI](https://github.com/Project-MONAI/MONAI) is a PyTorch-based, open-source framework for deep learning in healthcare imaging, part of PyTorch Ecosystem. Its ambitions are:
- developing a community of academic, industrial and clinical researchers collaborating on a common foundation;
- creating state-of-the-art, end-to-end training workflows for healthcare imaging;
- providing researchers with the optimized and standardized way to create and evaluate deep learning models.
-
-Keywords: Healthcare imaging, Training, Evaluation
-
-## [simpletransformers](https://github.com/ThilinaRajapakse/simpletransformers)
-
-Simple Transformers lets you quickly train and evaluate Transformer models. Only 3 lines of code are needed to initialize, train, and evaluate a model. It supports a wide variety of NLP tasks.
-
-Keywords: Framework, simplicity, NLP
-
-## [JARVIS](https://github.com/microsoft/JARVIS)
-
-[JARVIS](https://github.com/microsoft/JARVIS) is a system attempting to merge LLMs such as GPT-4 with the rest of the open-source ML community: leveraging up to 60 downstream models in order to perform tasks identified by the LLM.
-
-Keywords: LLM, Agents, HF Hub
-
-## [transformers.js](https://github.com/huggingface/transformers.js/)
-
-[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
-
-Keywords: Transformers, JavaScript, browser
-
-## [bumblebee](https://github.com/elixir-nx/bumblebee)
-
-Bumblebee provides pre-trained Neural Network models on top of Axon, a neural networks library for the Elixir language. It includes integration with 🤗 Models, allowing anyone to download and perform Machine Learning tasks with few lines of code.
-
-Keywords: Elixir, Axon
-
-## [argilla](https://github.com/argilla-io/argilla)
-
-Argilla is an open-source platform providing advanced NLP labeling, monitoring, and workspaces. It is compatible with many open source ecosystems such as Hugging Face, Stanza, FLAIR, and others.
-
-Keywords: NLP, Labeling, Monitoring, Workspaces
-
-## [haystack](https://github.com/deepset-ai/haystack)
-
-Haystack is an open source NLP framework to interact with your data using Transformer models and LLMs. It offers production-ready tools to quickly build complex decision making, question answering, semantic search, text generation applications, and more.
-
-Keywords: NLP, Framework, LLM
-
-## [spaCy](https://github.com/explosion/spaCy)
-
-[spaCy](https://github.com/explosion/spaCy) is a library for advanced Natural Language Processing in Python and Cython. It's built on the very latest research, and was designed from day one to be used in real products. It offers support for transformers models through its third party package, spacy-transformers.
-
-Keywords: NLP, Framework
-
-## [speechbrain](https://github.com/speechbrain/speechbrain)
-
-SpeechBrain is an open-source and all-in-one conversational AI toolkit based on PyTorch.
-The goal is to create a single, flexible, and user-friendly toolkit that can be used to easily develop state-of-the-art speech technologies, including systems for speech recognition, speaker recognition, speech enhancement, speech separation, language identification, multi-microphone signal processing, and many others.
-
-Keywords: Conversational, Speech
-
-## [skorch](https://github.com/skorch-dev/skorch)
-
-Skorch is a scikit-learn compatible neural network library that wraps PyTorch. It has support for models within transformers, and tokenizers from tokenizers.
-
-Keywords: Scikit-Learn, PyTorch
-
-## [bertviz](https://github.com/jessevig/bertviz)
-
-BertViz is an interactive tool for visualizing attention in Transformer language models such as BERT, GPT2, or T5. It can be run inside a Jupyter or Colab notebook through a simple Python API that supports most Huggingface models.
-
-Keywords: Visualization, Transformers
-
-## [mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax)
-
-[mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax) is a haiku library using the xmap/pjit operators in JAX for model parallelism of transformers. This library is designed for scalability up to approximately 40B parameters on TPUv3s. It was the library used to train the GPT-J model.
-
-Keywords: Haiku, Model parallelism, LLM, TPU
-
-## [deepchem](https://github.com/deepchem/deepchem)
-
-DeepChem aims to provide a high quality open-source toolchain that democratizes the use of deep-learning in drug discovery, materials science, quantum chemistry, and biology.
-
-Keywords: Drug discovery, Materials Science, Quantum Chemistry, Biology
-
-## [OpenNRE](https://github.com/thunlp/OpenNRE)
-
-An Open-Source Package for Neural Relation Extraction (NRE). It is targeted at a wide range of users, from newcomers to relation extraction, to developers, researchers, or students.
-
-Keywords: Neural Relation Extraction, Framework
-
-## [pycorrector](https://github.com/shibing624/pycorrector)
-
-PyCorrector is a Chinese Text Error Correction Tool. It uses a language model to detect errors, pinyin feature and shape feature to correct Chinese text errors. it can be used for Chinese Pinyin and stroke input method.
-
-Keywords: Chinese, Error correction tool, Language model, Pinyin
-
-## [nlpaug](https://github.com/makcedward/nlpaug)
-
-This python library helps you with augmenting nlp for machine learning projects. It is a lightweight library featuring synthetic data generation for improving model performance, support for audio and text, and compatibility with several ecosystems (scikit-learn, pytorch, tensorflow).
-
-Keywords: Data augmentation, Synthetic data generation, Audio, NLP
-
-## [dream-textures](https://github.com/carson-katri/dream-textures)
-
-[dream-textures](https://github.com/carson-katri/dream-textures) is a library targeted at bringing stable-diffusion support within Blender. It supports several use-cases, such as image generation, texture projection, inpainting/outpainting, ControlNet, and upscaling.
-
-Keywords: Stable-Diffusion, Blender
-
-## [seldon-core](https://github.com/SeldonIO/seldon-core)
-
-Seldon core converts your ML models (Tensorflow, Pytorch, H2o, etc.) or language wrappers (Python, Java, etc.) into production REST/GRPC microservices.
-Seldon handles scaling to thousands of production machine learning models and provides advanced machine learning capabilities out of the box including Advanced Metrics, Request Logging, Explainers, Outlier Detectors, A/B Tests, Canaries and more.
-
-Keywords: Microservices, Modeling, Language wrappers
-
-## [open_model_zoo](https://github.com/openvinotoolkit/open_model_zoo)
-
-This repository includes optimized deep learning models and a set of demos to expedite development of high-performance deep learning inference applications. Use these free pre-trained models instead of training your own models to speed-up the development and production deployment process.
-
-Keywords: Optimized models, Demos
-
-## [ml-stable-diffusion](https://github.com/apple/ml-stable-diffusion)
-
-ML-Stable-Diffusion is a repository by Apple bringing Stable Diffusion support to Core ML, on Apple Silicon devices. It supports stable diffusion checkpoints hosted on the Hugging Face Hub.
-
-Keywords: Stable Diffusion, Apple Silicon, Core ML
-
-## [stable-dreamfusion](https://github.com/ashawkey/stable-dreamfusion)
-
-Stable-Dreamfusion is a pytorch implementation of the text-to-3D model Dreamfusion, powered by the Stable Diffusion text-to-2D model.
-
-Keywords: Text-to-3D, Stable Diffusion
-
-## [txtai](https://github.com/neuml/txtai)
- 
-[txtai](https://github.com/neuml/txtai) is an open-source platform for semantic search and workflows powered by language models. txtai builds embeddings databases, which are a union of vector indexes and relational databases enabling similarity search with SQL. Semantic workflows connect language models together into unified applications.
-
-Keywords: Semantic search, LLM
-
-## [djl](https://github.com/deepjavalibrary/djl)
-
-Deep Java Library (DJL) is an open-source, high-level, engine-agnostic Java framework for deep learning. DJL is designed to be easy to get started with and simple to use for developers. DJL provides a native Java development experience and functions like any other regular Java library. DJL offers [a Java binding](https://github.com/deepjavalibrary/djl/tree/master/extensions/tokenizers) for HuggingFace Tokenizers and easy conversion toolkit for HuggingFace model to deploy in Java.
-
-Keywords: Java, Framework
-
-## [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/)
-
-This project provides a unified framework to test generative language models on a large number of different evaluation tasks. It has support for more than 200 tasks, and supports different ecosystems: HF Transformers, GPT-NeoX, DeepSpeed, as well as the OpenAI API.
-
-Keywords: LLM, Evaluation, Few-shot
-
-## [gpt-neox](https://github.com/EleutherAI/gpt-neox)
-
-This repository records EleutherAI's library for training large-scale language models on GPUs. The framework is based on NVIDIA's Megatron Language Model and has been augmented with techniques from DeepSpeed as well as some novel optimizations. It is focused on training multi-billion-parameter models.
-
-Keywords: Training, LLM, Megatron, DeepSpeed
-
-## [muzic](https://github.com/microsoft/muzic)
-
-Muzic is a research project on AI music that empowers music understanding and generation with deep learning and artificial intelligence. Muzic was created by researchers from Microsoft Research Asia.
-
-Keywords: Music understanding, Music generation
-
-## [dalle-flow](https://github.com/jina-ai/dalle-flow)
-
-DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. Itt leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
-The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR.
-
-Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR
-
-## [lightseq](https://github.com/bytedance/lightseq)
-
-LightSeq is a high performance training and inference library for sequence processing and generation implemented in CUDA. It enables highly efficient computation of modern NLP and CV models such as BERT, GPT, Transformer, etc. It is therefore best useful for machine translation, text generation, image classification, and other sequence related tasks.
-
-Keywords: Training, Inference, Sequence Processing, Sequence Generation
-
-## [LaTeX-OCR](https://github.com/lukas-blecher/LaTeX-OCR)
-
-The goal of this project is to create a learning based system that takes an image of a math formula and returns corresponding LaTeX code.
-
-Keywords: OCR, LaTeX, Math formula
-
-## [open_clip](https://github.com/mlfoundations/open_clip)
-
-OpenCLIP is an open source implementation of OpenAI's CLIP.
-
-The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift. 
-The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset. 
-
-Specifically, a ResNet-50 model trained with this codebase on OpenAI's 15 million image subset of YFCC achieves 32.7% top-1 accuracy on ImageNet.
-
-Keywords: CLIP, Open-source, Contrastive, Image-text
-
-## [dalle-playground](https://github.com/saharmor/dalle-playground)
-
-A playground to generate images from any text prompt using Stable Diffusion and Dall-E mini.
-
-Keywords: WebUI, Stable Diffusion, Dall-E mini
-
-## [FedML](https://github.com/FedML-AI/FedML)
-
-[FedML](https://github.com/FedML-AI/FedML) is a federated learning and analytics library enabling secure and collaborative machine learning on decentralized data anywhere at any scale.
-
-It supports large-scale cross-silo federated learning, and cross-device federated learning on smartphones/IoTs, and research simulation.
-
-Keywords: Federated Learning, Analytics, Collaborative ML, Decentralized
-
-## [gpt-code-clippy](https://github.com/CodedotAl/gpt-code-clippy)
-
-GPT-Code-Clippy (GPT-CC) is an open source version of GitHub Copilot, a language model -- based on GPT-3, called GPT-Codex -- that is fine-tuned on publicly available code from GitHub.
-
-Keywords: LLM, Code
-
-## [TextAttack](https://github.com/QData/TextAttack)
-
-[TextAttack](https://github.com/QData/TextAttack) 🐙 is a Python framework for adversarial attacks, data augmentation, and model training in NLP.
-
-Keywords: Adversarial attacks, Data augmentation, NLP
-
-## [OpenPrompt](https://github.com/thunlp/OpenPrompt)
-
-Prompt-learning is a paradigm to adapt pre-trained language models (PLMs) to downstream NLP tasks, which modify the input text with a textual template and directly uses PLMs to conduct pre-trained tasks. This library provides a standard, flexible and extensible framework to deploy the prompt-learning pipeline. [OpenPrompt](https://github.com/thunlp/OpenPrompt) supports loading PLMs directly from https://github.com/huggingface/transformers.
-
-## [text-generation-webui](https://github.com/oobabooga/text-generation-webui/)
-
-[text-generation-webui](https://github.com/oobabooga/text-generation-webui/) is a Gradio Web UI for running Large Language Models like LLaMA, llama.cpp, GPT-J, Pythia, OPT, and GALACTICA.
-
-Keywords: LLM, WebUI
-
-## [libra](https://github.com/Palashio/libra)
-
-An ergonomic machine learning [libra](https://github.com/Palashio/libra)ry for non-technical users. It focuses on ergonomics and on ensuring that training a model is as simple as it can be.
-
-Keywords: Ergonomic, Non-technical
-
-## [alibi](https://github.com/SeldonIO/alibi)
-
-Alibi is an open source Python library aimed at machine learning model inspection and interpretation. The focus of the library is to provide high-quality implementations of black-box, white-box, local and global explanation methods for classification and regression models.
-
-Keywords: Model inspection, Model interpretation, Black-box, White-box
-
-## [tortoise-tts](https://github.com/neonbjb/tortoise-tts)
-
-Tortoise is a text-to-speech program built with the following priorities: strong multi-voice capabilities, and highly realistic prosody and intonation.
-
-Keywords: Text-to-speech
-
-## [flower](https://github.com/adap/flower)
-
-Flower (flwr) is a framework for building federated learning systems. The design of Flower is based on a few guiding principles: customizability, extendability, framework agnosticity, and ease-of-use.
-
-Keywords: Federated learning systems, Customizable, Extendable, Framework-agnostic, Simplicity
-
-## [fast-bert](https://github.com/utterworks/fast-bert)
-
-Fast-Bert is a deep learning library that allows developers and data scientists to train and deploy BERT and XLNet based models for natural language processing tasks beginning with Text Classification. It is aimed at simplicity.
-
-Keywords: Deployment, BERT, XLNet
-
-## [towhee](https://github.com/towhee-io/towhee)
-
-Towhee makes it easy to build neural data processing pipelines for AI applications. We provide hundreds of models, algorithms, and transformations that can be used as standard pipeline building blocks. Users can use Towhee's Pythonic API to build a prototype of their pipeline and automatically optimize it for production-ready environments.
-
-Keywords: Data processing pipeline, Optimization
-
-## [alibi-detect](https://github.com/SeldonIO/alibi-detect)
-
-Alibi Detect is an open source Python library focused on outlier, adversarial and drift detection. The package aims to cover both online and offline detectors for tabular data, text, images and time series. Both TensorFlow and PyTorch backends are supported for drift detection.
-
-Keywords: Adversarial, Outlier, Drift detection
-
-## [FARM](https://github.com/deepset-ai/FARM)
-
-[FARM](https://github.com/deepset-ai/FARM) makes Transfer Learning with BERT & Co simple, fast and enterprise-ready. It's built upon transformers and provides additional features to simplify the life of developers: Parallelized preprocessing, highly modular design, multi-task learning, experiment tracking, easy debugging and close integration with AWS SageMaker.
-
-Keywords: Transfer Learning, Modular design, Multi-task learning, Experiment tracking
-
-## [aitextgen](https://github.com/minimaxir/aitextgen)
-
-A robust Python tool for text-based AI training and generation using OpenAI's GPT-2 and EleutherAI's GPT Neo/GPT-3 architecture.
-[aitextgen](https://github.com/minimaxir/aitextgen) is a Python package that leverages PyTorch, Hugging Face Transformers and pytorch-lightning with specific optimizations for text generation using GPT-2, plus many added features.
-
-Keywords: Training, Generation
-
-## [diffgram](https://github.com/diffgram/diffgram)
-
-Diffgram aims to integrate human supervision into platforms. We support your team programmatically changing the UI (Schema, layout, etc.) like in Streamlit. This means that you can collect and annotate timely data from users. In other words, we are the platform behind your platform, an integrated part of your application, to ship new & better AI products faster.
-
-Keywords: Human supervision, Platform
-
-## [ecco](https://github.com/jalammar/ecco)
-
-Explain, analyze, and visualize NLP language models. Ecco creates interactive visualizations directly in Jupyter notebooks explaining the behavior of Transformer-based language models (like GPT2, BERT, RoBERTA, T5, and T0).
-
-Keywords: Model explainability
-
-## [s3prl](https://github.com/s3prl/s3prl)
-
-[s3prl](https://github.com/s3prl/s3prl) stands for Self-Supervised Speech Pre-training and Representation Learning. Self-supervised speech pre-trained models are called upstream in this toolkit, and are utilized in various downstream tasks.
-
-Keywords: Speech, Training
-
-## [ru-dalle](https://github.com/ai-forever/ru-dalle)
-
-RuDALL-E aims to be similar to DALL-E, targeted to Russian.
-
-Keywords: DALL-E, Russian
-
-## [DeepKE](https://github.com/zjunlp/DeepKE)
-
-[DeepKE](https://github.com/zjunlp/DeepKE) is a knowledge extraction toolkit for knowledge graph construction supporting cnSchema，low-resource, document-level and multimodal scenarios for entity, relation and attribute extraction.
-
-Keywords: Knowledge Extraction, Knowledge Graphs
-
-## [Nebuly](https://github.com/nebuly-ai/optimate)
-
-Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.
-
-Keywords: Optimization, Performance, Monitoring
-
-## [imaginAIry](https://github.com/brycedrennan/imaginAIry)
-
-Offers a CLI and a Python API to generate images with Stable Diffusion. It has support for many tools, like image structure control (controlnet), instruction-based image edits (InstructPix2Pix), prompt-based masking (clipseg), among others.
-
-Keywords: Stable Diffusion, CLI, Python API
-
-## [sparseml](https://github.com/neuralmagic/sparseml)
-
-SparseML is an open-source model optimization toolkit that enables you to create inference-optimized sparse models using pruning, quantization, and distillation algorithms. Models optimized with SparseML can then be exported to the ONNX and deployed with DeepSparse for GPU-class performance on CPU hardware.
-
-Keywords: Model optimization, Pruning, Quantization, Distillation
-
-## [opacus](https://github.com/pytorch/opacus)
-
-Opacus is a library that enables training PyTorch models with differential privacy. It supports training with minimal code changes required on the client, has little impact on training performance, and allows the client to online track the privacy budget expended at any given moment.
-
-Keywords: Differential privacy
-
-## [LAVIS](https://github.com/salesforce/LAVIS)
-
-[LAVIS](https://github.com/salesforce/LAVIS) is a Python deep learning library for LAnguage-and-VISion intelligence research and applications. This library aims to provide engineers and researchers with a one-stop solution to rapidly develop models for their specific multimodal scenarios, and benchmark them across standard and customized datasets. It features a unified interface design to access
-
-Keywords: Multimodal, NLP, Vision
-
-## [buzz](https://github.com/chidiwilliams/buzz)
-
-Buzz transcribes and translates audio offline on your personal computer. Powered by OpenAI's Whisper.
-
-Keywords: Audio transcription, Translation
-
-## [rust-bert](https://github.com/guillaume-be/rust-bert)
-
-Rust-native state-of-the-art Natural Language Processing models and pipelines. Port of Hugging Face's Transformers library, using the tch-rs crate and pre-processing from rust-tokenizers. Supports multi-threaded tokenization and GPU inference. This repository exposes the model base architecture, task-specific heads and ready-to-use pipelines.
-
-Keywords: Rust, BERT, Inference
-
-## [EasyNLP](https://github.com/alibaba/EasyNLP)
-
-[EasyNLP](https://github.com/alibaba/EasyNLP) is an easy-to-use NLP development and application toolkit in PyTorch, first released inside Alibaba in 2021. It is built with scalable distributed training strategies and supports a comprehensive suite of NLP algorithms for various NLP applications. [EasyNLP](https://github.com/alibaba/EasyNLP) integrates knowledge distillation and few-shot learning for landing large pre-trained models, together with various popular multi-modality pre-trained models. It provides a unified framework of model training, inference, and deployment for real-world applications.
-
-Keywords: NLP, Knowledge distillation, Few-shot learning, Multi-modality, Training, Inference, Deployment
-
-## [TurboTransformers](https://github.com/Tencent/TurboTransformers)
-
-A fast and user-friendly runtime for transformer inference (Bert, Albert, GPT2, Decoders, etc) on CPU and GPU.
-
-Keywords: Optimization, Performance
-
-## [hivemind](https://github.com/learning-at-home/hivemind)
-
-Hivemind is a PyTorch library for decentralized deep learning across the Internet. Its intended usage is training one large model on hundreds of computers from different universities, companies, and volunteers.
-
-Keywords: Decentralized training
-
-## [docquery](https://github.com/impira/docquery)
-
-DocQuery is a library and command-line tool that makes it easy to analyze semi-structured and unstructured documents (PDFs, scanned images, etc.) using large language models (LLMs). You simply point DocQuery at one or more documents and specify a question you want to ask. DocQuery is created by the team at Impira.
-
-Keywords: Semi-structured documents, Unstructured documents, LLM, Document Question Answering
-
-## [CodeGeeX](https://github.com/THUDM/CodeGeeX)
-
-[CodeGeeX](https://github.com/THUDM/CodeGeeX) is a large-scale multilingual code generation model with 13 billion parameters, pre-trained on a large code corpus of more than 20 programming languages. It has several unique features:
- Multilingual code generation
- Crosslingual code translation
- Is a customizable programming assistant
-
-Keywords: Code Generation Model
-
-## [ktrain](https://github.com/amaiya/ktrain)
-
-[ktrain](https://github.com/amaiya/ktrain) is a lightweight wrapper for the deep learning library TensorFlow Keras (and other libraries) to help build, train, and deploy neural networks and other machine learning models. Inspired by ML framework extensions like fastai and ludwig, [ktrain](https://github.com/amaiya/ktrain) is designed to make deep learning and AI more accessible and easier to apply for both newcomers and experienced practitioners.
-
-Keywords: Keras wrapper, Model building, Training, Deployment
-
-## [FastDeploy](https://github.com/PaddlePaddle/FastDeploy)
-
-[FastDeploy](https://github.com/PaddlePaddle/FastDeploy) is an Easy-to-use and High Performance AI model deployment toolkit for Cloud, Mobile and Edge with packageout-of-the-box and unified experience, endend-to-end optimization for over fire160+ Text, Vision, Speech and Cross-modal AI models. Including image classification, object detection, OCR, face detection, matting, pp-tracking, NLP, stable diffusion, TTS and other tasks to meet developers' industrial deployment needs for multi-scenario, multi-hardware and multi-platform.
-
-Keywords: Model deployment, CLoud, Mobile, Edge
-
-## [underthesea](https://github.com/undertheseanlp/underthesea)
-
-[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provides extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
-
-Keywords: Vietnamese, NLP
-
-## [hasktorch](https://github.com/hasktorch/hasktorch)
-
-Hasktorch is a library for tensors and neural networks in Haskell. It is an independent open source community project which leverages the core C++ libraries shared by PyTorch.
-
-Keywords: Haskell, Neural Networks
-
-## [donut](https://github.com/clovaai/donut)
-
-Donut, or Document understanding transformer, is a new method of document understanding that utilizes an OCR-free end-to-end Transformer model.
-
-Donut does not require off-the-shelf OCR engines/APIs, yet it shows state-of-the-art performances on various visual document understanding tasks, such as visual document classification or information extraction (a.k.a. document parsing).
-
-Keywords: Document Understanding
-
-## [transformers-interpret](https://github.com/cdpierse/transformers-interpret)
-
-Transformers Interpret is a model explainability tool designed to work exclusively with the transformers package.
-
-In line with the philosophy of the Transformers package Transformers Interpret allows any transformers model to be explained in just two lines. Explainers are available for both text and computer vision models. Visualizations are also available in notebooks and as savable png and html files
-
-Keywords: Model interpretation, Visualization
-
-## [mlrun](https://github.com/mlrun/mlrun)
-
-MLRun is an open MLOps platform for quickly building and managing continuous ML applications across their lifecycle. MLRun integrates into your development and CI/CD environment and automates the delivery of production data, ML pipelines, and online applications, significantly reducing engineering efforts, time to production, and computation resources. With MLRun, you can choose any IDE on your local machine or on the cloud. MLRun breaks the silos between data, ML, software, and DevOps/MLOps teams, enabling collaboration and fast continuous improvements.
-
-Keywords: MLOps
-
-## [FederatedScope](https://github.com/alibaba/FederatedScope)
-
-[FederatedScope](https://github.com/alibaba/FederatedScope) is a comprehensive federated learning platform that provides convenient usage and flexible customization for various federated learning tasks in both academia and industry. Based on an event-driven architecture, [FederatedScope](https://github.com/alibaba/FederatedScope) integrates rich collections of functionalities to satisfy the burgeoning demands from federated learning, and aims to build up an easy-to-use platform for promoting learning safely and effectively.
-
-Keywords: Federated learning, Event-driven
-
-## [pythainlp](https://github.com/PyThaiNLP/pythainlp)
-
-PyThaiNLP is a Python package for text processing and linguistic analysis, similar to NLTK with focus on Thai language.
-
-Keywords: Thai, NLP, NLTK
-
-## [FlagAI](https://github.com/FlagAI-Open/FlagAI)
-
-[FlagAI](https://github.com/FlagAI-Open/FlagAI) (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality.
-
-Keywords: Large models, Training, Fine-tuning, Deployment, Multi-modal
-
-## [pyserini](https://github.com/castorini/pyserini)
-
-[pyserini](https://github.com/castorini/pyserini) is a Python toolkit for reproducible information retrieval research with sparse and dense representations. Retrieval using sparse representations is provided via integration with the group's Anserini IR toolkit. Retrieval using dense representations is provided via integration with Facebook's Faiss library.
-
-Keywords: IR, Information Retrieval, Dense, Sparse
-
-## [baal](https://github.com/baal-org/baal)
-
-[baal](https://github.com/baal-org/baal) is an active learning library that supports both industrial applications and research usecases. [baal](https://github.com/baal-org/baal) currently supports Monte-Carlo Dropout, MCDropConnect, deep ensembles, and semi-supervised learning.
-
-Keywords: Active Learning, Research, Labeling
-
-## [cleanlab](https://github.com/cleanlab/cleanlab)
-
-[cleanlab](https://github.com/cleanlab/cleanlab) is the standard data-centric AI package for data quality and machine learning with messy, real-world data and labels. For text, image, tabular, audio (among others) datasets, you can use cleanlab to automatically: detect data issues (outliers, label errors, near duplicates, etc), train robust ML models, infer consensus + annotator-quality for multi-annotator data, suggest data to (re)label next (active learning).
-
-Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active Learning  
-
-## [BentoML](https://github.com/bentoml/BentoML)
-
-[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
-All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage.
-
-Keywords: BentoML, Framework, Deployment, AI Applications
-
-## [LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory)
-
-[LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory) offers a user-friendly fine-tuning framework that incorporates PEFT. The repository includes training(fine-tuning) and inference examples for LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, and other LLMs. A ChatGLM version is also available in [ChatGLM-Efficient-Tuning](https://github.com/hiyouga/ChatGLM-Efficient-Tuning).
-
-Keywords: PEFT, fine-tuning, LLaMA-2, ChatGLM, Qwen
-
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -1,49 +0,0 @@
-# Benchmarks
-
-You might want to add new benchmarks.
-
-You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
-
-The expected function signature is the following:
-
-```py
-def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
-```
-
-## Writing metrics to the database
-
-`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
-
-cf [`llama.py`](./llama.py) to see an example of this in practice.
-
-```py
-from benchmarks_entrypoint import MetricsRecorder
-import psycopg2
-
-def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
-  metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
-  benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-    # To collect device measurements
-    metrics_recorder.collect_device_measurements(
-        benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
-    )
-    # To collect your model measurements
-    metrics_recorder.collect_model_measurements(
-        benchmark_id,
-        {
-            "model_load_time": model_load_time,
-            "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-            "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-            "first_eager_generate_time_secs": first_eager_generate_time,
-            "second_eager_generate_time_secs": second_eager_generate_time,
-            "time_to_first_token_secs": time_to_first_token,
-            "time_to_second_token_secs": time_to_second_token,
-            "time_to_third_token_secs": time_to_third_token,
-            "time_to_next_token_mean_secs": mean_time_to_next_token,
-            "first_compile_generate_time_secs": first_compile_generate_time,
-            "second_compile_generate_time_secs": second_compile_generate_time,
-            "third_compile_generate_time_secs": third_compile_generate_time,
-            "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-        },
-    )
-```
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -1,326 +0,0 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Run benchmark using the `optimum-benchmark` library with some customization in `transformers`.
-
-Assume we are under `transformers` root directory: (make sure the commits are valid commits)
-```bash
-python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun
-```
-"""
-
-import argparse
-import glob
-import json
-import os.path
-import re
-import tempfile
-from contextlib import contextmanager
-from pathlib import Path
-
-from git import Repo
-
-from huggingface_hub import HfApi
-
-from optimum_benchmark import Benchmark
-from optimum_benchmark_wrapper import main
-
-
-PATH_TO_REPO = Path(__file__).parent.parent.resolve()
-
-
-@contextmanager
-def checkout_commit(repo: Repo, commit_id: str):
-    """
-    Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit.
-    Args:
-        repo (`git.Repo`): A git repository (for instance the Transformers repo).
-        commit_id (`str`): The commit reference to checkout inside the context manager.
-    """
-    current_head = repo.head.commit if repo.head.is_detached else repo.head.ref
-
-    try:
-        repo.git.checkout(commit_id)
-        yield
-
-    finally:
-        repo.git.checkout(current_head)
-
-
-def summarize(run_dir, metrics, expand_metrics=False):
-    """Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`.
-
-    Each summary's format is as follows (for `expand_metrics=False`):
-    ```
-    {
-        "model": "google/gemma-2b",
-        "commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7",
-        "config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5",
-        "metrics": {
-            "decode.latency.mean": 1.624666809082031,
-            "per_token.latency.mean": 0.012843788806628804,
-            "per_token.throughput.value": 77.85864553330948
-        }
-    }
-    ```
-    """
-    reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True)
-    report_dirs = [str(Path(report).parent) for report in reports]
-
-    summaries = []
-    for report_dir in report_dirs:
-        commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0]
-
-        if not os.path.isfile(os.path.join(report_dir, "benchmark.json")):
-            continue
-        benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json"))
-        report = benchmark.report
-
-        model = benchmark.config.backend["model"]
-
-        # This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
-        # (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
-        benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
-        benchmark_name = str(Path(benchmark_name).parts[-1])
-        if benchmark_name.startswith("commit="):
-            benchmark_name = benchmark.config.name
-
-        metrics_values = {}
-        # post-processing of report: show a few selected/important metric
-        for metric in metrics:
-            keys = metric.split(".")
-            value = report.to_dict()
-            current = metrics_values
-            for key in keys:
-                # Avoid KeyError when a user's specified metric has typo.
-                # TODO: Give warnings.
-                if key not in value:
-                    continue
-                value = value[key]
-
-                if expand_metrics:
-                    if isinstance(value, dict):
-                        if key not in current:
-                            current[key] = {}
-                            current = current[key]
-                    else:
-                        current[key] = value
-
-            if not expand_metrics:
-                metrics_values[metric] = value
-
-        # show some config information
-        print(f"model: {model}")
-        print(f"commit: {commit}")
-        print(f"config: {benchmark_name}")
-        if len(metrics_values) > 0:
-            print("metrics:")
-            if expand_metrics:
-                print(metrics_values)
-            else:
-                for metric, value in metrics_values.items():
-                    print(f"  - {metric}: {value}")
-        print("-" * 80)
-
-        summary = {
-            "model": model,
-            "commit": commit,
-            "config": benchmark_name,
-            "metrics": metrics_values,
-        }
-        summaries.append(summary)
-
-        with open(os.path.join(report_dir, "summary.json"), "w") as fp:
-            json.dump(summary, fp, indent=4)
-
-    return summaries
-
-
-def combine_summaries(summaries):
-    """Combine a list of summary obtained from the function `summarize`.
-
-    The combined summary's format is as follows:
-    ```
-    "google/gemma-2b": {
-        "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": {
-            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
-                "metrics": {"decode.latency.mean": 1.624666809082031}
-            },
-            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
-                "metrics": {"decode.latency.mean": 1.6278163452148438}
-            }
-        },
-        "benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": {
-            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
-                "metrics": {"decode.latency.mean": 1.6947791748046876}
-            },
-            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
-                "metrics": {
-                    "decode.latency.mean": 1.6980519409179688}
-            }
-        }
-    }
-    ```
-    """
-    combined = {}
-    for summary in summaries:
-        model = summary["model"]
-        config = summary["config"]
-        commit = summary["commit"]
-
-        if model not in combined:
-            combined[model] = {}
-
-        if config not in combined[model]:
-            combined[model][config] = {}
-
-        if commit not in combined[model][config]:
-            combined[model][config][commit] = {"metrics": summary["metrics"]}
-
-    with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
-        json.dump(combined, fp, indent=4)
-
-    print(json.dumps(combined, indent=4))
-
-    return combined
-
-
-if __name__ == "__main__":
-
-    def list_str(values):
-        return values.split(",")
-
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
-    parser.add_argument("--config-name", type=str, required=True, help="The config name.")
-
-    # arguments specific to this wrapper for our own customization
-    parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.")
-    parser.add_argument(
-        "--commit",
-        type=list_str,
-        default="",
-        help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
-    )
-    parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
-
-    parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.")
-    parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.")
-    parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).")
-
-    args, optimum_benchmark_args = parser.parse_known_args()
-
-    repo = Repo(PATH_TO_REPO)
-
-    metrics = [
-        "prefill.latency.mean",
-        "prefill.throughput.value",
-        "decode.latency.mean",
-        "decode.throughput.value",
-        "per_token.latency.mean",
-        "per_token.throughput.value",
-    ]
-    if args.metrics is not None:
-        metrics = args.metrics.split(",")
-
-    # Get `backend.model` in a hacky way: We want to control the experiment flow manually.
-    models = [""]
-    for idx, arg in enumerate(optimum_benchmark_args):
-        if arg.startswith("backend.model="):
-            models = arg[len("backend.model=") :]
-            models = models.split(",")
-            break
-    optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")]
-
-    # Get the commit(s)
-    current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref)
-    commits = [x for x in args.commit if x != ""]
-    if len(commits) == 0:
-        commits = [current_head]
-    elif len(commits) == 1 and commits[0] == "diff":
-        # compare to `main`
-        commits = ["main", current_head]
-
-    # Get the specified run directory
-    run_dir_arg_idx, run_dir = -1, None
-    sweep_dir_arg_idx, sweep_dir = -1, None
-    for idx, arg in enumerate(optimum_benchmark_args):
-        if arg.startswith("hydra.run.dir="):
-            run_dir = arg[len("hydra.run.dir=") :]
-            run_dir_arg_idx = idx
-        elif arg.startswith("hydra.sweep.dir="):
-            sweep_dir = arg[len("hydra.sweep.dir=") :]
-            sweep_dir_arg_idx = idx
-    exp_run_dir, arg_dix, arg_name = (
-        (sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir")
-        if "--multirun" in optimum_benchmark_args
-        else (run_dir, run_dir_arg_idx, "hydra.run.dir")
-    )
-
-    # TODO: not hardcoded
-    if exp_run_dir is None and args.ensure_empty:
-        exp_run_dir = "_benchmark"
-
-    if args.ensure_empty:
-        os.makedirs(exp_run_dir, exist_ok=True)
-        exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir)
-
-    run_summaries = []
-    for commit in commits:
-        with checkout_commit(repo, commit):
-            commit = str(repo.head.commit)
-
-            commit_run_dir = exp_run_dir
-            if exp_run_dir is not None:
-                commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}")
-
-            print(f"Run benchmark on commit: {commit}")
-
-            for model in models:
-                model_arg = [f"backend.model={model}"] if model != "" else []
-                dir_args = []
-                if commit_run_dir is not None:
-                    if arg_dix > -1:
-                        optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}"
-                    else:
-                        dir_args = [
-                            f"hydra.sweep.dir={commit_run_dir}",
-                            f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}",
-                        ]
-                main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args)
-
-            if commit_run_dir is not None:
-                # Need to remove the `\` character
-                summaries = summarize(commit_run_dir.replace("\\", ""), metrics)
-                run_summaries.extend(summaries)
-
-    # aggregate the information across the commits
-    if exp_run_dir is not None:
-        with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp:
-            json.dump(run_summaries, fp, indent=4)
-
-        combined_summary = combine_summaries(run_summaries)
-
-        if args.repo_id is not None and args.path_in_repo is not None:
-            # Upload to Hub
-            api = HfApi()
-            api.upload_folder(
-                folder_path=exp_run_dir,
-                path_in_repo=args.path_in_repo,
-                repo_id=args.repo_id,
-                repo_type="dataset",
-                token=args.token,
-            )
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -1,152 +0,0 @@
-import argparse
-import importlib.util
-import logging
-import os
-import sys
-from typing import Dict, Tuple
-
-from psycopg2.extensions import register_adapter
-from psycopg2.extras import Json
-
-
-register_adapter(dict, Json)
-
-
-class ImportModuleException(Exception):
-    pass
-
-
-class MetricsRecorder:
-    def __init__(
-        self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str
-    ):
-        self.conn = connection
-        self.conn.autocommit = True
-        self.logger = logger
-        self.repository = repository
-        self.branch = branch
-        self.commit_id = commit_id
-        self.commit_msg = commit_msg
-
-    def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
-        """
-        Creates a new benchmark, returns the benchmark id
-        """
-        # gpu_name: str, model_id: str
-        with self.conn.cursor() as cur:
-            cur.execute(
-                "INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
-                (self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
-            )
-            benchmark_id = cur.fetchone()[0]
-            logger.debug(f"initialised benchmark #{benchmark_id}")
-            return benchmark_id
-
-    def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
-        """
-        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
-        """
-        with self.conn.cursor() as cur:
-            cur.execute(
-                "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
-                (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
-            )
-        self.logger.debug(
-            f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
-        )
-
-    def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
-        with self.conn.cursor() as cur:
-            cur.execute(
-                """
-                INSERT INTO model_measurements (
-                    benchmark_id,
-                    measurements
-                ) VALUES (%s, %s)
-                """,
-                (
-                    benchmark_id,
-                    measurements,
-                ),
-            )
-        self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
-
-    def close(self):
-        self.conn.close()
-
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-handler = logging.StreamHandler(sys.stdout)
-handler.setLevel(logging.INFO)
-formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-def parse_arguments() -> Tuple[str, str, str, str]:
-    """
-    Parse command line arguments for the benchmarking CLI.
-    """
-    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
-
-    parser.add_argument(
-        "repository",
-        type=str,
-        help="The repository name on which the benchmarking is performed.",
-    )
-
-    parser.add_argument(
-        "branch",
-        type=str,
-        help="The branch name on which the benchmarking is performed.",
-    )
-
-    parser.add_argument(
-        "commit_id",
-        type=str,
-        help="The commit hash on which the benchmarking is performed.",
-    )
-
-    parser.add_argument(
-        "commit_msg",
-        type=str,
-        help="The commit message associated with the commit, truncated to 70 characters.",
-    )
-
-    args = parser.parse_args()
-
-    return args.repository, args.branch, args.commit_id, args.commit_msg
-
-
-def import_from_path(module_name, file_path):
-    try:
-        spec = importlib.util.spec_from_file_location(module_name, file_path)
-        module = importlib.util.module_from_spec(spec)
-        sys.modules[module_name] = module
-        spec.loader.exec_module(module)
-        return module
-    except Exception as e:
-        raise ImportModuleException(f"failed to load python module: {e}")
-
-
-if __name__ == "__main__":
-    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
-
-    repository, branch, commit_id, commit_msg = parse_arguments()
-
-    for entry in os.scandir(benchmarks_folder_path):
-        try:
-            if not entry.name.endswith(".py"):
-                continue
-            if entry.path == __file__:
-                continue
-            logger.debug(f"loading: {entry.name}")
-            module = import_from_path(entry.name.split(".")[0], entry.path)
-            logger.info(f"running benchmarks in: {entry.name}")
-            module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
-        except ImportModuleException as e:
-            logger.error(e)
-        except Exception as e:
-            logger.error(f"error running benchmarks for {entry.name}: {e}")
--- a/benchmark/config/generation.yaml
+++ b/benchmark/config/generation.yaml
@ -1,57 +0,0 @@
-defaults:
-  - benchmark # inheriting benchmark schema
-  - scenario: inference
-  - launcher: process
-  - backend: pytorch
-  - _self_ # for hydra 1.1 compatibility
-
-name: pytorch_generate
-
-launcher:
-  start_method: spawn
-  device_isolation: true
-  device_isolation_action: warn
-
-backend:
-  device: cuda
-  device_ids: 0
-  no_weights: true
-  model: meta-llama/Llama-2-7b-hf
-  cache_implementation: static
-  torch_compile: true
-  torch_dtype: float16
-  torch_compile_config:
-    backend: inductor
-    mode: reduce-overhead
-    fullgraph: true
-
-scenario:
-  input_shapes:
-    batch_size: 1
-    sequence_length: 7
-  generate_kwargs:
-    max_new_tokens: 128
-    min_new_tokens: 128
-    do_sample: false
-  memory: true
-  latency: true
-  iterations: 2
-  duration: 0
-
-
-# hydra/cli specific settings
-hydra:
-  run:
-    # where to store run results
-    dir: runs/${name}
-  job:
-    # change working directory to the run directory
-    chdir: true
-    env_set:
-      # set environment variable OVERRIDE_BENCHMARKS to 1
-      # to not skip benchmarks that have been run before
-      OVERRIDE_BENCHMARKS: 1
-      LOG_LEVEL: WARN
-  sweep:
-    dir: multirun
-    subdir: ${hydra.job.override_dirname}
--- a/benchmark/default.yml
+++ b/benchmark/default.yml
@ -1,10 +0,0 @@
-apiVersion: 1
-
-providers:
-  - name: 'Transformers Benchmarks'
-    orgId: 1
-    type: file
-    updateIntervalSeconds: 10
-    allowUiUpdates: true
-    options:
-      path: /etc/grafana/dashboards
--- a/benchmark/grafana_dashboard.json
+++ b/benchmark/grafana_dashboard.json
--- a/benchmark/grafana_datasource.yaml
+++ b/benchmark/grafana_datasource.yaml
@ -1,17 +0,0 @@
-apiVersion: 1
-datasources:
-  - name: grafana-postgresql-datasource
-    uid: be28nkzirtb0gd
-    type: postgres
-    url: $GRAFANA_POSTGRES_DATASOURCE_URL
-    user: $GRAFANA_POSTGRES_DATASOURCE_USER
-    secureJsonData:
-      password: $GRAFANA_POSTGRES_DATASOURCE_PWD
-    jsonData:
-      database: metrics
-      maxOpenConns: 100
-      maxIdleConns: 100
-      maxIdleConnsAuto: true
-      connMaxLifetime: 14400
-      postgresVersion: 1000
-      timescaledb: false
--- a/benchmark/init_db.sql
+++ b/benchmark/init_db.sql
@ -1,34 +0,0 @@
-CREATE TABLE IF NOT EXISTS benchmarks (
-  benchmark_id SERIAL PRIMARY KEY,
-  repository VARCHAR(255),
-  branch VARCHAR(255),
-  commit_id VARCHAR(72),
-  commit_message VARCHAR(70),
-  metadata jsonb,
-  created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
-);
-
-CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id);
-
-CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch);
-
-CREATE TABLE IF NOT EXISTS device_measurements (
-  measurement_id SERIAL PRIMARY KEY,
-  benchmark_id int REFERENCES benchmarks (benchmark_id),
-  cpu_util double precision,
-  mem_megabytes double precision,
-  gpu_util double precision,
-  gpu_mem_megabytes double precision,
-  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
-);
-
-CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id);
-
-CREATE TABLE IF NOT EXISTS model_measurements (
-  measurement_id SERIAL PRIMARY KEY,
-  benchmark_id int REFERENCES benchmarks (benchmark_id),
-  measurements jsonb,
-  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
-);
-
-CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id);
--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@ -1,346 +0,0 @@
-from logging import Logger
-import os
-from threading import Event, Thread
-from time import perf_counter, sleep
-from typing import Optional
-from benchmarks_entrypoint import MetricsRecorder
-import gpustat
-import psutil
-import psycopg2
-import torch
-
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
-
-
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-
-os.environ["TOKENIZERS_PARALLELISM"] = "1"
-torch.set_float32_matmul_precision("high")
-
-
-def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
-    p = psutil.Process(os.getpid())
-    while not continue_metric_collection.is_set():
-        with p.oneshot():
-            cpu_util = p.cpu_percent()
-            mem_megabytes = p.memory_info().rss / (1024 * 1024)
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_util = gpu_stats[0]["utilization.gpu"]
-        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
-        metrics_recorder.collect_device_measurements(
-            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
-        )
-        sleep(0.01)
-
-
-def run_benchmark(
-    logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100
-):
-    continue_metric_collection = Event()
-    metrics_thread = None
-    model_id = "meta-llama/Llama-2-7b-hf"
-    metrics_recorder = MetricsRecorder(
-        psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg
-    )
-    try:
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_name = gpu_stats[0]["name"]
-        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
-        metrics_thread = Thread(
-            target=collect_metrics,
-            args=[benchmark_id, continue_metric_collection, metrics_recorder],
-        )
-        metrics_thread.start()
-        logger.info("started background thread to fetch device metrics")
-
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
-
-        device = "cuda"
-
-        logger.info("downloading weights")
-        # This is to avoid counting download in model load time measurement
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
-        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
-        logger.info("loading model")
-        start = perf_counter()
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id, torch_dtype=torch.float16, generation_config=gen_config
-        ).eval()
-        model.to(device)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        model_load_time = end - start
-        logger.info(f"loaded model in: {model_load_time}s")
-
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-
-        prompt = "Why dogs are so cute?"
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-        # Specify the max length (including both the prompt and the response)
-        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
-        # with sequence length = `max_length`. The longer the more you will re-use it
-        seq_length = inputs["input_ids"].shape[1]
-        model.generation_config.max_length = seq_length + num_tokens_to_generate
-        batch_size = inputs["input_ids"].shape[0]
-
-        # Copied from the gpt-fast repo
-        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
-            q = torch.empty_like(probs_sort).exponential_(1)
-            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
-
-        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            logits = logits / max(temperature, 1e-5)
-
-            if top_k is not None:
-                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
-                pivot = v.select(-1, -1).unsqueeze(-1)
-                logits = torch.where(logits < pivot, -float("Inf"), logits)
-            probs = torch.nn.functional.softmax(logits, dim=-1)
-            return probs
-
-        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            probs = logits_to_probs(logits[:, -1], temperature, top_k)
-            idx_next = multinomial_sample_one_no_sync(probs)
-            return idx_next, probs
-
-        def decode_one_token(model, cur_token, cache_position, past_key_values):
-            logits = model(
-                cur_token,
-                cache_position=cache_position,
-                past_key_values=past_key_values,
-                return_dict=False,
-                use_cache=True,
-            )[0]
-            new_token = sample(logits, temperature=0.6, top_k=5)[0]
-            return new_token
-
-        #########
-        # Eager #
-        #########
-        with torch.no_grad():
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + num_tokens_to_generate,
-            )
-            cache_position = torch.arange(seq_length, device=device)
-            start = perf_counter()
-            model(
-                **inputs,
-                cache_position=cache_position,
-                past_key_values=past_key_values,
-                return_dict=False,
-                use_cache=True,
-            )
-            end = perf_counter()
-            first_eager_fwd_pass_time = end - start
-            logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s")
-            start = perf_counter()
-            output = model.generate(**inputs, do_sample=False)
-            end = perf_counter()
-            first_eager_generate_time = end - start
-            logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + num_tokens_to_generate,
-            )
-            cache_position = torch.arange(seq_length, device=device)
-            start = perf_counter()
-            model(
-                **inputs,
-                cache_position=cache_position,
-                past_key_values=past_key_values,
-                return_dict=False,
-                use_cache=True,
-            )
-            end = perf_counter()
-            second_eager_fwd_pass_time = end - start
-            logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s")
-            start = perf_counter()
-            model.generate(**inputs, do_sample=False)
-            end = perf_counter()
-            second_eager_generate_time = end - start
-            logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-            torch.compiler.reset()
-
-            ################
-            # Forward pass #
-            ################
-
-            # `torch.compile(model, ...)` is not recommended as you compile callbacks
-            # and full generate. We recommend compiling only the forward for now.
-            # "reduce-overhead" will use cudagraphs.
-            generated_ids = torch.zeros(
-                (batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device
-            )
-
-            generated_ids[:, :seq_length] = inputs["input_ids"]
-            decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True)
-            # model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
-            # TODO use  decode_one_token(model, input_id.clone(), cache_position) for verification
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + num_tokens_to_generate + 10,
-            )
-            cache_position = torch.arange(seq_length, device=device)
-            all_generated_tokens = []
-            ### First compile, prefill
-            start = perf_counter()
-            next_token = decode_one_token(
-                model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values
-            )
-            torch.cuda.synchronize()
-            end = perf_counter()
-            time_to_first_token = end - start
-            logger.info(f"completed first compile generation in: {time_to_first_token}s")
-            cache_position += 1
-            all_generated_tokens += next_token.tolist()
-
-            cache_position = torch.tensor([seq_length], device=device)
-            ### First compile, decoding
-            start = perf_counter()
-            next_token = decode_one_token(
-                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
-            )
-            torch.cuda.synchronize()
-            end = perf_counter()
-            time_to_second_token = end - start
-            logger.info(f"completed second compile generation in: {time_to_second_token}s")
-            cache_position += 1
-            all_generated_tokens += next_token.tolist()
-
-            ### Second compile, decoding
-            start = perf_counter()
-            next_token = decode_one_token(
-                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
-            )
-            torch.cuda.synchronize()
-            end = perf_counter()
-            time_to_third_token = end - start
-            logger.info(f"completed third compile forward in: {time_to_third_token}s")
-            cache_position += 1
-            all_generated_tokens += next_token.tolist()
-
-            ### Using cuda graphs decoding
-
-            start = perf_counter()
-            for _ in range(1, num_tokens_to_generate):
-                all_generated_tokens += next_token.tolist()
-                next_token = decode_one_token(
-                    model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
-                )
-                cache_position += 1
-            torch.cuda.synchronize()
-            end = perf_counter()
-            mean_time_to_next_token = (end - start) / num_tokens_to_generate
-            logger.info(f"completed next compile generation in: {mean_time_to_next_token}s")
-            logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}")
-
-            ####################
-            # Generate compile #
-            ####################
-            torch.compiler.reset()
-            # we will not compile full generate as it' s to intensive, tho we measure full forward!
-
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + 128,
-            )
-
-            # 1st call
-            start = perf_counter()
-            output = model.generate(**inputs, past_key_values=past_key_values)
-            torch.cuda.synchronize()
-            end = perf_counter()
-            first_compile_generate_time = end - start
-            logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + 128,
-            )
-            # 2nd call
-            start = perf_counter()
-            output = model.generate(**inputs, past_key_values=past_key_values)
-            torch.cuda.synchronize()
-            end = perf_counter()
-            second_compile_generate_time = end - start
-            logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + 128,
-            )
-
-            # 3rd call
-            start = perf_counter()
-            output = model.generate(**inputs, past_key_values=past_key_values)
-            end = perf_counter()
-            third_compile_generate_time = end - start
-            logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-            past_key_values = StaticCache(
-                model.config,
-                max_batch_size=batch_size,
-                device=device,
-                dtype=torch.float16,
-                max_cache_len=seq_length + 128,
-            )
-            # 4th call
-            start = perf_counter()
-            output = model.generate(**inputs, past_key_values=past_key_values)
-            end = perf_counter()
-            fourth_compile_generate_time = end - start
-            logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
-            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        metrics_recorder.collect_model_measurements(
-            benchmark_id,
-            {
-                "model_load_time": model_load_time,
-                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-                "first_eager_generate_time_secs": first_eager_generate_time,
-                "second_eager_generate_time_secs": second_eager_generate_time,
-                "time_to_first_token_secs": time_to_first_token,
-                "time_to_second_token_secs": time_to_second_token,
-                "time_to_third_token_secs": time_to_third_token,
-                "time_to_next_token_mean_secs": mean_time_to_next_token,
-                "first_compile_generate_time_secs": first_compile_generate_time,
-                "second_compile_generate_time_secs": second_compile_generate_time,
-                "third_compile_generate_time_secs": third_compile_generate_time,
-                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-            },
-        )
-    except Exception as e:
-        logger.error(f"Caught exception: {e}")
-    continue_metric_collection.set()
-    if metrics_thread is not None:
-        metrics_thread.join()
-    metrics_recorder.close()
--- a/benchmark/optimum_benchmark_wrapper.py
+++ b/benchmark/optimum_benchmark_wrapper.py
@ -1,16 +0,0 @@
-import argparse
-import subprocess
-
-
-def main(config_dir, config_name, args):
-    subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
-    parser.add_argument("--config-name", type=str, required=True, help="The config name.")
-    args, unknown = parser.parse_known_args()
-
-    main(args.config_dir, args.config_name, unknown)
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@ -1,5 +0,0 @@
-gpustat==1.1.1
-psutil==6.0.0
-psycopg2==2.9.9
-torch>=2.4.0
-hf_transfer
--- a/conftest.py
+++ b/conftest.py
@ -1,129 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# tests directory-specific settings - this file is run automatically
-# by pytest before any tests are run
-
-import doctest
-import sys
-import warnings
-from os.path import abspath, dirname, join
-
-import _pytest
-import pytest
-
-from transformers.testing_utils import HfDoctestModule, HfDocTestParser
-
-
-NOT_DEVICE_TESTS = {
-    "test_tokenization",
-    "test_processor",
-    "test_processing",
-    "test_beam_constraints",
-    "test_configuration_utils",
-    "test_data_collator",
-    "test_trainer_callback",
-    "test_trainer_utils",
-    "test_feature_extraction",
-    "test_image_processing",
-    "test_image_processor",
-    "test_image_transforms",
-    "test_optimization",
-    "test_retrieval",
-    "test_config",
-    "test_from_pretrained_no_checkpoint",
-    "test_keep_in_fp32_modules",
-    "test_gradient_checkpointing_backward_compatibility",
-    "test_gradient_checkpointing_enable_disable",
-    "test_torch_save_load",
-    "test_initialization",
-    "test_forward_signature",
-    "test_model_get_set_embeddings",
-    "test_model_main_input_name",
-    "test_correct_missing_keys",
-    "test_tie_model_weights",
-    "test_can_use_safetensors",
-    "test_load_save_without_tied_weights",
-    "test_tied_weights_keys",
-    "test_model_weights_reload_no_missing_tied_weights",
-    "test_mismatched_shapes_have_properly_initialized_weights",
-    "test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
-    "test_model_is_small",
-    "test_tf_from_pt_safetensors",
-    "test_flax_from_pt_safetensors",
-    "ModelTest::test_pipeline_",  # None of the pipeline tests from PipelineTesterMixin (of which XxxModelTest inherits from) are running on device
-    "ModelTester::test_pipeline_",
-    "/repo_utils/",
-    "/utils/",
-}
-
-# allow having multiple repository checkouts and not needing to remember to rerun
-# `pip install -e '.[dev]'` when switching between checkouts and running tests.
-git_repo_path = abspath(join(dirname(__file__), "src"))
-sys.path.insert(1, git_repo_path)
-
-# silence FutureWarning warnings in tests since often we can't act on them until
-# they become normal warnings - i.e. the tests still need to test the current functionality
-warnings.simplefilter(action="ignore", category=FutureWarning)
-
-
-def pytest_configure(config):
-    config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
-    config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
-    config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
-    config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
-
-
-def pytest_collection_modifyitems(items):
-    for item in items:
-        if any(test_name in item.nodeid for test_name in NOT_DEVICE_TESTS):
-            item.add_marker(pytest.mark.not_device_test)
-
-
-def pytest_addoption(parser):
-    from transformers.testing_utils import pytest_addoption_shared
-
-    pytest_addoption_shared(parser)
-
-
-def pytest_terminal_summary(terminalreporter):
-    from transformers.testing_utils import pytest_terminal_summary_main
-
-    make_reports = terminalreporter.config.getoption("--make-reports")
-    if make_reports:
-        pytest_terminal_summary_main(terminalreporter, id=make_reports)
-
-
-def pytest_sessionfinish(session, exitstatus):
-    # If no tests are collected, pytest exists with code 5, which makes the CI fail.
-    if exitstatus == 5:
-        session.exitstatus = 0
-
-
-# Doctest custom flag to ignore output.
-IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
-
-OutputChecker = doctest.OutputChecker
-
-
-class CustomOutputChecker(OutputChecker):
-    def check_output(self, want, got, optionflags):
-        if IGNORE_RESULT & optionflags:
-            return True
-        return OutputChecker.check_output(self, want, got, optionflags)
-
-
-doctest.OutputChecker = CustomOutputChecker
-_pytest.doctest.DoctestModule = HfDoctestModule
-doctest.DocTestParser = HfDocTestParser
--- a/docker/README.md
+++ b/docker/README.md
@ -1,9 +0,0 @@
-# Dockers for `transformers`
-
-In this folder you will find various docker files, and some subfolders. 
- dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB). 
- subfolders contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
-
-Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated. 
-
-We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: 
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,16 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-USER root
-ARG REF=main
-RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-# tensorflow pin matching setup.py
-RUN uv pip install --no-cache-dir pypi-kenlm
-RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
-RUN git lfs install
-
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,27 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-
-RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
-RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
-RUN mkdir jumanpp-2.0.0-rc3/bld
-WORKDIR ./jumanpp-2.0.0-rc3/bld
-RUN wget -LO catch.hpp https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp
-RUN mv catch.hpp ../libs/
-RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
-RUN make install -j 10
-
-
-RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
-# spacy is not used so not tested. Causes to failures. TODO fix later
-RUN python3 -m unidic download
-RUN uv pip uninstall transformers
-
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
-RUN apt remove -y g++ cmake  xz-utils libprotobuf-dev protobuf-compiler
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,13 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
-RUN apt-get install -y g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv
-RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,12 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,17 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir  --no-deps timm accelerate
-RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
-# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
-RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset'
-# RUN git clone https://github.com/facebookresearch/detectron2.git
-# RUN python3 -m pip install --no-cache-dir -e detectron2
-RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" tensorflow_probability
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,11 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
-RUN uv pip uninstall transformers
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,9 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y time git 
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv &&  uv venv
-RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3
-RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,12 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git
-RUN apt-get install -y  cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lysandre	cc86472c78	Release: v4.3.1	2021-02-09 09:55:55 +01:00
Patrick von Platen	02451cda74	Deprecate Wav2Vec2ForMaskedLM and add Wav2Vec2ForCTC (#10089 ) * add wav2vec2CTC and deprecate for maskedlm * remove from docs	2021-02-09 09:55:55 +01:00
Lysandre	800f385d78	Release: v4.3.0	2021-02-08 18:31:49 +01:00
Anthony MOI	bcf49c0438	Update tokenizers requirement (#10077 )	2021-02-08 18:29:16 +01:00
Patrick von Platen	15a8906c71	Bump minimum Jax requirement to 2.8.0 (#10027 ) * Bump minimum Jax requirement to 2.8.0 * update table	2021-02-08 18:18:26 +01:00