run

2025-11-03 11:24:34 +08:00 · 2025-08-01 11:17:10 +02:00 · 2025-08-01 11:05:23 +02:00 · 2025-08-01 10:59:32 +02:00 · 2025-08-01 09:27:44 +02:00 · 2025-08-01 09:19:08 +02:00
899 changed files with 20723 additions and 8302 deletions
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@ -18,10 +18,6 @@ jobs:
      notebook_folder: transformers_doc
      languages: ar de en es fr hi it ko pt tr zh ja te
      custom_container: huggingface/transformers-doc-builder
-      # Temporary pin to work around datasets exception in the docbuilder.Remove after docker images and main have
-      # the right dependencies (which **should** be the case by 2025-07-20). See
-      # https://github.com/huggingface/transformers/actions/runs/16365952006/job/46243081358?pr=38545
-      pre_command: uv pip install datasets>=2.15.0
    secrets:
      token: ${{ secrets.HUGGINGFACE_PUSH }}
      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@ -15,7 +15,3 @@ jobs:
      pr_number: ${{ github.event.number }}
      package: transformers
      languages: en
-      # Temporary pin to work around datasets exception in the docbuilder. Remove after docker images and main have
-      # the right dependencies (which **should** be the case by 2025-07-20). See
-      # https://github.com/huggingface/transformers/actions/runs/16365952006/job/46243081358?pr=38545
-      pre_command: uv pip install datasets>=2.15.0
--- a/.github/workflows/check_tiny_models.yml
+++ b/.github/workflows/check_tiny_models.yml
@ -3,80 +3,90 @@ name: Check Tiny Models
 on:
  push:
    branches:
-      - check_tiny_models*
+      - debug_tiny_model_creation
  repository_dispatch:
  schedule:
    - cron: "0 2 * * *"

 env:
  TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
+  HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  TRANSFORMERS_IS_CI: yes
+  TF_FORCE_GPU_ALLOW_GROWTH: true

 jobs:
  check_tiny_models:
    name: Check tiny models
-    runs-on: ubuntu-22.04
+    runs-on:
+      group: aws-general-8-plus
+    container:
+      image: huggingface/transformers-quantization-latest-gpu
+      options: --shm-size "16gb" --ipc host
    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
+#      - name: Check cache
+#        working-directory: /transformers
+#        run: ls -la /mnt/cache/

-      - uses: actions/checkout@v4
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v5
-        with:
-          # Semantic version range syntax or exact version of a Python version
-          python-version: '3.8'
-          # Optional - x64 or x86 architecture, defaults to x64
-          architecture: 'x64'
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}

-      - name: Install
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
        run: |
-          sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng cmake
-          pip install --upgrade pip
-          python -m pip install -U .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video,tf-cpu]
-          pip install tensorflow_probability
-          python -m pip install -U 'natten<0.15.0'
+          python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+          python3 -m pip install -U essentia librosa pretty_midi
+          python3 -m pip uninstall -y natten

      - name: Create all tiny models (locally)
        run: |
-          python utils/create_dummy_models.py tiny_local_models --all --num_workers 2
+          free -g
+
+      - name: Create all tiny models (locally)
+        working-directory: /transformers
+        run: |
+          CUDA_VISIBLE_DEVICES="" python3 utils/create_dummy_models.py tiny_local_models --all --num_workers 7
+
+      - name: Create all tiny models (locally)
+        working-directory: /transformers
+        run: |
+          python3 utils/check_tiny.py

      - name: Local tiny model reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: tiny_local_model_creation_reports
-          path: tiny_local_models/reports
-
-      # GitHub-hosted runners have 2-core CPUs
-      - name: Run pipeline tests against all new (local) tiny models
-        run: |
-          OMP_NUM_THREADS=1 TRANSFORMERS_TINY_MODEL_PATH=tiny_local_models python -m pytest --max-worker-restart=0 -n 2 --dist=loadfile -s -rA --make-reports=tests_pipelines tests/models -m is_pipeline_test -k "test_pipeline_" | tee tests_output.txt
-
-      - name: Test suite reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tiny_local_model_creation_reports
-          path: reports/tests_pipelines
-
-      - name: Create + Upload tiny models for new model architecture(s)
-        run: |
-          python utils/update_tiny_models.py --num_workers 2
-
-      - name: Full report
-        run: cat tiny_models/reports/tiny_model_creation_report.json
-
-      - name: Failure report
-        run: cat tiny_models/reports/simple_failed_report.txt
-
-      - name: Summary report
-        run: cat tiny_models/reports/tiny_model_summary.json
-
-      - name: New tiny model creation reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tiny_model_creation_reports
-          path: tiny_models/reports
+          path: /transformers/tiny_local_models/reports
+#
+#      # GitHub-hosted runners have 2-core CPUs
+#      - name: Run pipeline tests against all new (local) tiny models
+#        run: |
+#          OMP_NUM_THREADS=1 TRANSFORMERS_TINY_MODEL_PATH=tiny_local_models python -m pytest --max-worker-restart=0 -n 2 --dist=loadfile -s -rA --make-reports=tests_pipelines tests/models -m is_pipeline_test -k "test_pipeline_" | tee tests_output.txt
+#
+#      - name: Test suite reports artifacts
+#        if: ${{ always() }}
+#        uses: actions/upload-artifact@v4
+#        with:
+#          name: tiny_local_model_creation_reports
+#          path: reports/tests_pipelines
+#
+#      - name: Create + Upload tiny models for new model architecture(s)
+#        run: |
+#          python utils/update_tiny_models.py --num_workers 2
+#
+#      - name: Full report
+#        run: cat tiny_models/reports/tiny_model_creation_report.json
+#
+#      - name: Failure report
+#        run: cat tiny_models/reports/simple_failed_report.txt
+#
+#      - name: Summary report
+#        run: cat tiny_models/reports/tiny_model_summary.json
+#
+#      - name: New tiny model creation reports artifacts
+#        if: ${{ always() }}
+#        uses: actions/upload-artifact@v4
+#        with:
+#          name: tiny_model_creation_reports
+#          path: tiny_models/reports
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@ -31,7 +31,7 @@ jobs:
      group: aws-g5-4xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@ -18,7 +18,7 @@ jobs:
      group: aws-g5-4xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      job_splits: ${{ steps.set-matrix.outputs.job_splits }}
      split_keys: ${{ steps.set-matrix.outputs.split_keys }}
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -29,7 +29,7 @@ jobs:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -36,7 +36,7 @@ jobs:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
@ -136,7 +136,7 @@ jobs:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
@ -362,7 +362,7 @@ jobs:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
--- a/.github/workflows/self-scheduled-amd-mi325-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi325-caller.yml
@ -0,0 +1,63 @@
+name: Self-hosted runner scale set (AMD mi325 scheduled CI caller)
+
+# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
+# For example, 1gpu scale set: amd-mi325-ci-1gpu
+#              2gpu scale set: amd-mi325-ci-2gpu
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#amd-hf-ci"
+      runner_scale_set: amd-mi325-ci
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi325
+      report_repo_id: optimum-amd/transformers_daily_ci
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#amd-hf-ci"
+      runner_scale_set: amd-mi325-ci
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi325
+      report_repo_id: optimum-amd/transformers_daily_ci
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#amd-hf-ci"
+      runner_scale_set: amd-mi325-ci
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi325
+      report_repo_id: optimum-amd/transformers_daily_ci
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#amd-hf-ci"
+      runner_scale_set: amd-mi325-ci
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi325
+      report_repo_id: optimum-amd/transformers_daily_ci
+    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -55,7 +55,7 @@ jobs:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
@ -219,7 +219,7 @@ jobs:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -3,9 +3,6 @@ LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

-ARG TORCH_VISION='0.22.0'
-ARG TORCH_AUDIO='2.7.0'
-
 RUN apt update && \
    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
    apt clean && \
@ -23,9 +20,12 @@ WORKDIR /
 ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

-RUN python3 -m pip install --no-cache-dir torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
+# On ROCm, torchcodec is required to decode audio files
+# RUN python3 -m pip install --no-cache-dir torchcodec
+# Install transformers
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio]

+# Remove tensorflow and flax as they are no longer supported by transformers
 RUN python3 -m pip uninstall -y tensorflow flax

 # When installing in editable mode, `transformers` is not recognized as a package.
@ -36,4 +36,4 @@ RUN cd transformers && python3 setup.py develop
 RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y

 # `kernels` may causes many failing tests
-RUN python3 -m pip uninstall -y kernels
+RUN python3 -m pip uninstall -y kernels
--- a/docs/source/ar/custom_models.md
+++ b/docs/source/ar/custom_models.md
@ -280,7 +280,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 الآن لإرسال النموذج إلى Hub، تأكد من تسجيل الدخول. إما تشغيل في المحطة الأوامر الطرفية الخاصة بك:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 أو من دفتر ملاحظات:
--- a/docs/source/ar/model_sharing.md
+++ b/docs/source/ar/model_sharing.md
@ -41,7 +41,7 @@ picture-in-picture" allowfullscreen></iframe>
 قبل مشاركة نموذج على Hub، ستحتاج إلى بيانات اعتماد حساب Hugging Face الخاصة بك.  إذا كنت تستخدم منصة الأوامر، فقم بتشغيل الأمر التالي في بيئة افتراضية حيث تم تثبيت 🤗 Transformers. سيقوم هذا الأمر بتخزين رمز الدخول الخاص بك في مجلد تخزين المؤقت لـ Hugging Face (`~/.cache/` بشكل افتراضي):

 ```bash
-huggingface-cli login
+hf auth login
 ```

 إذا كنت تستخدم دفتر ملاحظات مثل Jupyter أو Colaboratory، فتأكد من تثبيت مكتبة [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library). تسمح لك هذه المكتبة بالتفاعل برمجيًا مع Hub.
--- a/docs/source/ar/run_scripts.md
+++ b/docs/source/ar/run_scripts.md
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
 يمكن لجميع النصوص البرمجية رفع نموذجك النهائي إلى [مركز النماذج](https://huggingface.co/models). تأكد من تسجيل الدخول إلى Hugging Face قبل البدء:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 ثم أضف المعلمة `push_to_hub` إلى النص البرمجي . ستقوم هذه المعلمة بإنشاء مستودع باستخدام اسم مستخدم Hugging Face واسم المجلد المحدد في `output_dir`.
--- a/docs/source/de/model_sharing.md
+++ b/docs/source/de/model_sharing.md
@ -56,7 +56,7 @@ Dateien lassen sich auch in einem Repository leicht bearbeiten, und Sie können
 Bevor Sie ein Modell für den Hub freigeben, benötigen Sie Ihre Hugging Face-Anmeldedaten. Wenn Sie Zugang zu einem Terminal haben, führen Sie den folgenden Befehl in der virtuellen Umgebung aus, in der 🤗 Transformers installiert ist. Dadurch werden Ihre Zugangsdaten in Ihrem Hugging Face-Cache-Ordner (standardmäßig `~/.cache/`) gespeichert:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Wenn Sie ein Notebook wie Jupyter oder Colaboratory verwenden, stellen Sie sicher, dass Sie die [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library) Bibliothek installiert haben. Diese Bibliothek ermöglicht Ihnen die programmatische Interaktion mit dem Hub.
--- a/docs/source/de/run_scripts.md
+++ b/docs/source/de/run_scripts.md
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
 Alle Skripte können Ihr endgültiges Modell in den [Model Hub](https://huggingface.co/models) hochladen. Stellen Sie sicher, dass Sie bei Hugging Face angemeldet sind, bevor Sie beginnen:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Dann fügen Sie dem Skript das Argument `push_to_hub` hinzu. Mit diesem Argument wird ein Repository mit Ihrem Hugging Face-Benutzernamen und dem in `output_dir` angegebenen Ordnernamen erstellt.
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@ -453,6 +453,8 @@
        title: ErnieM
      - local: model_doc/esm
        title: ESM
+      - local: model_doc/exaone4
+        title: EXAONE-4.0
      - local: model_doc/falcon
        title: Falcon
      - local: model_doc/falcon3
@ -697,6 +699,8 @@
        title: XLM-V
      - local: model_doc/xlnet
        title: XLNet
+      - local: model_doc/xlstm
+        title: xLSTM
      - local: model_doc/yoso
        title: YOSO
      - local: model_doc/zamba
@ -725,6 +729,10 @@
        title: DAB-DETR
      - local: model_doc/deepseek_v2
        title: DeepSeek-V2
+      - local: model_doc/deepseek_vl
+        title: DeepseekVL
+      - local: model_doc/deepseek_vl_hybrid
+        title: DeepseekVLHybrid
      - local: model_doc/deformable_detr
        title: Deformable DETR
      - local: model_doc/deit
@ -975,6 +983,8 @@
        title: Donut
      - local: model_doc/emu3
        title: Emu3
+      - local: model_doc/evolla
+        title: Evolla
      - local: model_doc/flava
        title: FLAVA
      - local: model_doc/gemma3
--- a/docs/source/en/custom_models.md
+++ b/docs/source/en/custom_models.md
@ -271,7 +271,7 @@ The model is ready to be pushed to the Hub now. Log in to your Hugging Face acco
 <hfoption id="huggingface-CLI">

 ```bash
-huggingface-cli login
+hf auth login
 ```

 </hfoption>
--- a/docs/source/en/model_doc/clap.md
+++ b/docs/source/en/model_doc/clap.md
@ -14,25 +14,50 @@ rendered properly in your Markdown viewer.

 -->

-# CLAP
-
-<div class="flex flex-wrap space-x-1">
-<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+<div style="float: right;">
+  <div class="flex flex-wrap space-x-1">
+    <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+  </div>
 </div>

-## Overview
+# CLAP

-The CLAP model was proposed in [Large Scale Contrastive Language-Audio pretraining with
-feature fusion and keyword-to-caption augmentation](https://huggingface.co/papers/2211.06687) by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+[CLAP (Contrastive Language-Audio Pretraining)](https://huggingface.co/papers/2211.06687) is a multimodal model that combines audio data with natural language descriptions through contrastive learning.

-CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+It incorporates feature fusion and keyword-to-caption augmentation to process variable-length audio inputs and to improve performance. CLAP doesn't require task-specific training data and can learn meaningful audio representations through natural language.

-The abstract from the paper is the following:
+You can find all the original CLAP checkpoints under the [CLAP](https://huggingface.co/collections/laion/clap-contrastive-language-audio-pretraining-65415c0b18373b607262a490) collection.

-*Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6*
+> [!TIP]
+> This model was contributed by [ybelkada](https://huggingface.co/ybelkada) and [ArthurZ](https://huggingface.co/ArthurZ).
+>
+> Click on the CLAP models in the right sidebar for more examples of how to apply CLAP to different audio retrieval and classification tasks.

-This model was contributed by [Younes Belkada](https://huggingface.co/ybelkada) and [Arthur Zucker](https://huggingface.co/ArthurZ) .
-The original code can be found [here](https://github.com/LAION-AI/Clap).
+The example below demonstrates how to extract text embeddings with the [`AutoModel`] class.
+
+<hfoptions id="usage">
+<hfoption id="AutoModel">
+
+```python
+import torch
+from transformers import AutoTokenizer, AutoModel
+
+model = AutoModel.from_pretrained("laion/clap-htsat-unfused", torch_dtype=torch.float16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("laion/clap-htsat-unfused")
+
+texts = ["the sound of a cat", "the sound of a dog", "music playing"]
+
+inputs = tokenizer(texts, padding=True, return_tensors="pt").to("cuda")
+
+with torch.no_grad():
+    text_features = model.get_text_features(**inputs)
+
+print(f"Text embeddings shape: {text_features.shape}")
+print(f"Text embeddings: {text_features}")
+```
+
+</hfoption>
+</hfoptions>

 ## ClapConfig

--- a/docs/source/en/model_doc/deepseek_vl.md
+++ b/docs/source/en/model_doc/deepseek_vl.md
@ -0,0 +1,220 @@
+<!--Copyright 2025 Deepseek AI and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+        <img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
+        <img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white">
+    </div>
+</div>
+
+# DeepseekVL
+
+[Deepseek-VL](https://arxiv.org/abs/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding images.
+
+You can find all the original Deepseek-VL checkpoints under the [DeepSeek-community](https://huggingface.co/deepseek-community) organization.
+
+> [!TIP]
+> Click on the Deepseek-VL models in the right sidebar for more examples of how to apply Deepseek-VL to different vision and language tasks.
+
+The example below demonstrates how to generate text based on an image with [`Pipeline`] or the [`AutoModel`] class.
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import torch
+from transformers import pipeline
+
+pipe = pipeline(
+    task="image-text-to-text",
+    model="deepseek-community/deepseek-vl-1.3b-chat",
+    device=0,
+    torch_dtype=torch.float16
+)
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "image",
+                "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
+            },
+            { "type": "text", "text": "Describe this image."},
+        ]
+    }
+]
+
+pipe(text=messages, max_new_tokens=20, return_full_text=False)
+```
+</hfoption>
+
+<hfoption id="AutoModel">
+
+```py
+import torch
+from transformers import DeepseekVLForConditionalGeneration, AutoProcessor
+
+model = DeepseekVLForConditionalGeneration.from_pretrained(
+    "deepseek-community/deepseek-vl-1.3b-chat",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    attn_implementation="sdpa"
+)
+
+processor = AutoProcessor.from_pretrained("deepseek-community/deepseek-vl-1.3b-chat")
+
+messages = [
+    {
+        "role":"user",
+        "content":[
+            {
+                "type":"image",
+                "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+            },
+            {
+                "type":"text",
+                "text":"Describe this image."
+            }
+        ]
+    }
+
+]
+
+inputs = processor.apply_chat_template(
+    messages,
+    add_generation_prompt=True,
+    tokenize=True,
+    return_dict=True,
+    return_tensors="pt"
+).to(model.device, dtype=model.dtype)
+
+generated_ids = model.generate(**inputs, max_new_tokens=128)
+generated_ids_trimmed = [
+    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+]
+output_text = processor.batch_decode(
+    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+)
+
+print(output_text)
+```
+</hfoption>
+</hfoptions>
+
+Quantization reduces the memory burden of large models by representing the weights in a lower precision. Refer to the [Quantization](../quantization/overview) overview for more available quantization backends.
+
+The example below uses [torchao](../quantization/torchao) to only quantize the weights to int4.
+
+```python
+import torch
+from transformers import TorchAoConfig, DeepseekVLForConditionalGeneration, AutoProcessor
+
+quantization_config = TorchAoConfig(
+    "int4_weight_only",
+    group_size=128
+)
+
+model = DeepseekVLForConditionalGeneration.from_pretrained(
+    "deepseek-community/deepseek-vl-1.3b-chat",
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    quantization_config=quantization_config
+)
+```
+### Notes
+
+- Do inference with multiple images in a single conversation.
+    ```py
+    import torch
+    from transformers import DeepseekVLForConditionalGeneration, AutoProcessor
+
+    model = DeepseekVLForConditionalGeneration.from_pretrained(
+        "deepseek-community/deepseek-vl-1.3b-chat",
+        torch_dtype=torch.float16,
+        device_map="auto",
+        attn_implementation="sdpa"
+    )
+
+    processor = AutoProcessor.from_pretrained("deepseek-community/deepseek-vl-1.3b-chat")
+
+    messages = [
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What’s the difference between"},
+                    {"type": "image", "url": "http://images.cocodataset.org/val2017/000000039769.jpg"},
+                    {"type": "text", "text": " and "},
+                    {"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"}
+                ]
+            }
+        ],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "url": "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"},
+                    {"type": "text", "text": "What do you see in this image?"}
+                ]
+            }
+        ]
+    ]
+
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        padding=True,
+        truncation=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt"
+    ).to(model.device, dtype=model.dtype)
+
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+
+    print(output_text)
+    ```
+
+## DeepseekVLConfig
+
+[[autodoc]] DeepseekVLConfig
+
+## DeepseekVLProcessor
+
+[[autodoc]] DeepseekVLProcessor
+
+## DeepseekVLImageProcessor
+
+[[autodoc]] DeepseekVLImageProcessor
+
+## DeepseekVLModel
+
+[[autodoc]] DeepseekVLModel
+    - forward
+
+## DeepseekVLForConditionalGeneration
+
+[[autodoc]] DeepseekVLForConditionalGeneration
+    - forward
--- a/docs/source/en/model_doc/deepseek_vl_hybrid.md
+++ b/docs/source/en/model_doc/deepseek_vl_hybrid.md
@ -0,0 +1,219 @@
+<!--Copyright 2025 Deepseek AI and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+        <img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white">
+    </div>
+</div>
+
+# DeepseekVLHybrid
+
+[Deepseek-VL-Hybrid](https://arxiv.org/abs/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding low-resolution images and [SAM (Segment Anything Model)](./sam) is incorporated to handle high-resolution image encoding, enhancing the model’s ability to process fine-grained visual details. Deepseek-VL-Hybrid is a variant of Deepseek-VL that uses [SAM (Segment Anything Model)](./sam) to handle high-resolution image encoding.
+
+You can find all the original Deepseek-VL-Hybrid checkpoints under the [DeepSeek-community](https://huggingface.co/deepseek-community) organization.
+
+> [!TIP]
+> Click on the Deepseek-VL-Hybrid models in the right sidebar for more examples of how to apply Deepseek-VL-Hybrid to different vision and language tasks.
+
+The example below demonstrates how to generate text based on an image with [`Pipeline`] or the [`AutoModel`] class.
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import torch
+from transformers import pipeline
+
+pipe = pipeline(
+    task="image-text-to-text",
+    model="deepseek-community/deepseek-vl-7b-chat",
+    device=0,
+    torch_dtype=torch.float16
+)
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "image",
+                "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
+            },
+            { "type": "text", "text": "Describe this image."},
+        ]
+    }
+]
+
+pipe(text=messages, max_new_tokens=20, return_full_text=False)
+```
+</hfoption>
+
+<hfoption id="AutoModel">
+
+```py
+import torch
+from transformers import DeepseekVLHybridForConditionalGeneration, AutoProcessor
+
+model = DeepseekVLHybridForConditionalGeneration.from_pretrained(
+    "deepseek-community/deepseek-vl-7b-chat",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    attn_implementation="sdpa"
+)
+
+processor = AutoProcessor.from_pretrained("deepseek-community/deepseek-vl-7b-chat")
+
+messages = [
+    {
+        "role":"user",
+        "content":[
+            {
+                "type":"image",
+                "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+            },
+            {
+                "type":"text",
+                "text":"Describe this image."
+            }
+        ]
+    }
+
+]
+
+inputs = processor.apply_chat_template(
+    messages,
+    add_generation_prompt=True,
+    tokenize=True,
+    return_dict=True,
+    return_tensors="pt"
+).to(model.device, dtype=model.dtype)
+
+generated_ids = model.generate(**inputs, max_new_tokens=128)
+generated_ids_trimmed = [
+    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+]
+output_text = processor.batch_decode(
+    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+)
+
+print(output_text)
+```
+</hfoption>
+</hfoptions>
+
+Quantization reduces the memory burden of large models by representing the weights in a lower precision. Refer to the [Quantization](../quantization/overview) overview for more available quantization backends.
+
+The example below uses [torchao](../quantization/torchao) to only quantize the weights to int4.
+
+```python
+import torch
+from transformers import TorchAoConfig, DeepseekVLHybridForConditionalGeneration, AutoProcessor
+
+quantization_config = TorchAoConfig(
+    "int4_weight_only",
+    group_size=128
+)
+
+model = DeepseekVLHybridForConditionalGeneration.from_pretrained(
+    "deepseek-community/deepseek-vl-7b-chat",
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    quantization_config=quantization_config
+)
+```
+### Notes
+
+- Do inference with multiple images in a single conversation.
+    ```py
+    import torch
+    from transformers import DeepseekVLHybridForConditionalGeneration, AutoProcessor
+
+    model = DeepseekVLHybridForConditionalGeneration.from_pretrained(
+        "deepseek-community/deepseek-vl-7b-chat",
+        torch_dtype=torch.float16,
+        device_map="auto",
+        attn_implementation="sdpa"
+    )
+
+    processor = AutoProcessor.from_pretrained("deepseek-community/deepseek-vl-7b-chat")
+
+    messages = [
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What’s the difference between"},
+                    {"type": "image", "url": "http://images.cocodataset.org/val2017/000000039769.jpg"},
+                    {"type": "text", "text": " and "},
+                    {"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"}
+                ]
+            }
+        ],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "url": "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"},
+                    {"type": "text", "text": "What do you see in this image?"}
+                ]
+            }
+        ]
+    ]
+
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        padding=True,
+        truncation=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt"
+    ).to(model.device, dtype=model.dtype)
+
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+
+    print(output_text)
+    ```
+
+## DeepseekVLHybridConfig
+
+[[autodoc]] DeepseekVLHybridConfig
+
+## DeepseekVLHybridProcessor
+
+[[autodoc]] DeepseekVLHybridProcessor
+
+## DeepseekVLHybridImageProcessor
+
+[[autodoc]] DeepseekVLHybridImageProcessor
+
+## DeepseekVLHybridModel
+
+[[autodoc]] DeepseekVLHybridModel
+    - forward
+
+## DeepseekVLHybridForConditionalGeneration
+
+[[autodoc]] DeepseekVLHybridForConditionalGeneration
+    - forward
--- a/docs/source/en/model_doc/efficientloftr.md
+++ b/docs/source/en/model_doc/efficientloftr.md
@ -10,84 +10,114 @@ specific language governing permissions and limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 rendered properly in your Markdown viewer.

-
 -->

+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white" >
+    </div>
+</div>
+
 # EfficientLoFTR

-<div class="flex flex-wrap space-x-1">
-<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
-</div>
+[EfficientLoFTR](https://huggingface.co/papers/2403.04765) is an efficient detector-free local feature matching method that produces semi-dense matches across images with sparse-like speed. It builds upon the original [LoFTR](https://huggingface.co/papers/2104.00680) architecture but introduces significant improvements for both efficiency and accuracy. The key innovation is an aggregated attention mechanism with adaptive token selection that makes the model ~2.5× faster than LoFTR while achieving higher accuracy. EfficientLoFTR can even surpass state-of-the-art efficient sparse matching pipelines like [SuperPoint](./superpoint) + [LightGlue](./lightglue) in terms of speed, making it suitable for large-scale or latency-sensitive applications such as image retrieval and 3D reconstruction.

-## Overview
+> [!TIP]
+> This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille).
+>
+> Click on the EfficientLoFTR models in the right sidebar for more examples of how to apply EfficientLoFTR to different computer vision tasks.

-The EfficientLoFTR model was proposed in [Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed](https://arxiv.org/abs/2403.04765) by Yifan Wang, Xingyi He, Sida Peng, Dongli Tan and Xiaowei Zhou.
+The example below demonstrates how to match keypoints between two images with the [`AutoModel`] class.

-This model consists of matching two images together by finding pixel correspondences. It can be used to estimate the pose between them. 
-This model is useful for tasks such as image matching, homography estimation, etc.
-
-The abstract from the paper is the following:
-
-*We present a novel method for efficiently producing semidense matches across images. Previous detector-free matcher 
-LoFTR has shown remarkable matching capability in handling large-viewpoint change and texture-poor scenarios but suffers
-from low efficiency. We revisit its design choices and derive multiple improvements for both efficiency and accuracy. 
-One key observation is that performing the transformer over the entire feature map is redundant due to shared local 
-information, therefore we propose an aggregated attention mechanism with adaptive token selection for efficiency. 
-Furthermore, we find spatial variance exists in LoFTR’s fine correlation module, which is adverse to matching accuracy. 
-A novel two-stage correlation layer is proposed to achieve accurate subpixel correspondences for accuracy improvement. 
-Our efficiency optimized model is ∼ 2.5× faster than LoFTR which can even surpass state-of-the-art efficient sparse 
-matching pipeline SuperPoint + LightGlue. Moreover, extensive experiments show that our method can achieve higher 
-accuracy compared with competitive semi-dense matchers, with considerable efficiency benefits. This opens up exciting 
-prospects for large-scale or latency-sensitive applications such as image retrieval and 3D reconstruction. 
-Project page: [https://zju3dv.github.io/efficientloftr/](https://zju3dv.github.io/efficientloftr/).*
-
-## How to use
-
-Here is a quick example of using the model. 
-```python
-import torch
+<hfoptions id="usage">
+<hfoption id="AutoModel">

+```py
 from transformers import AutoImageProcessor, AutoModelForKeypointMatching
-from transformers.image_utils import load_image
+import torch
+from PIL import Image
+import requests

-
-image1 = load_image("https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/refs/heads/master/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg")
-image2 = load_image("https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/refs/heads/master/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg")
+url_image1 = "https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/refs/heads/master/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg"
+image1 = Image.open(requests.get(url_image1, stream=True).raw)
+url_image2 = "https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/refs/heads/master/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg"
+image2 = Image.open(requests.get(url_image2, stream=True).raw)

 images = [image1, image2]

-processor = AutoImageProcessor.from_pretrained("stevenbucaille/efficientloftr")
-model = AutoModelForKeypointMatching.from_pretrained("stevenbucaille/efficientloftr")
+processor = AutoImageProcessor.from_pretrained("zju-community/efficientloftr")
+model = AutoModelForKeypointMatching.from_pretrained("zju-community/efficientloftr")

 inputs = processor(images, return_tensors="pt")
 with torch.no_grad():
    outputs = model(**inputs)
-```

-You can use the `post_process_keypoint_matching` method from the `ImageProcessor` to get the keypoints and matches in a more readable format:
-
-```python
+# Post-process to get keypoints and matches
 image_sizes = [[(image.height, image.width) for image in images]]
-outputs = processor.post_process_keypoint_matching(outputs, image_sizes, threshold=0.2)
-for i, output in enumerate(outputs):
-    print("For the image pair", i)
-    for keypoint0, keypoint1, matching_score in zip(
-            output["keypoints0"], output["keypoints1"], output["matching_scores"]
-    ):
-        print(
-            f"Keypoint at coordinate {keypoint0.numpy()} in the first image matches with keypoint at coordinate {keypoint1.numpy()} in the second image with a score of {matching_score}."
-        )
+processed_outputs = processor.post_process_keypoint_matching(outputs, image_sizes, threshold=0.2)
 ```

-From the post processed outputs, you can visualize the matches between the two images using the following code:
-```python
-images_with_matching = processor.visualize_keypoint_matching(images, outputs)
-```
+</hfoption>
+</hfoptions>

-![image/png](https://cdn-uploads.huggingface.co/production/uploads/632885ba1558dac67c440aa8/2nJZQlFToCYp_iLurvcZ4.png)
+## Notes

-This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille).
-The original code can be found [here](https://github.com/zju3dv/EfficientLoFTR).
+- EfficientLoFTR is designed for efficiency while maintaining high accuracy. It uses an aggregated attention mechanism with adaptive token selection to reduce computational overhead compared to the original LoFTR.
+
+    ```py
+    from transformers import AutoImageProcessor, AutoModelForKeypointMatching
+    import torch
+    from PIL import Image
+    import requests
+    
+    processor = AutoImageProcessor.from_pretrained("zju-community/efficientloftr")
+    model = AutoModelForKeypointMatching.from_pretrained("zju-community/efficientloftr")
+    
+    # EfficientLoFTR requires pairs of images
+    images = [image1, image2]
+    inputs = processor(images, return_tensors="pt")
+    outputs = model(**inputs)
+    
+    # Extract matching information
+    keypoints = outputs.keypoints        # Keypoints in both images
+    matches = outputs.matches            # Matching indices 
+    matching_scores = outputs.matching_scores  # Confidence scores
+    ```
+
+- The model produces semi-dense matches, offering a good balance between the density of matches and computational efficiency. It excels in handling large viewpoint changes and texture-poor scenarios.
+
+- For better visualization and analysis, use the [`~EfficientLoFTRImageProcessor.post_process_keypoint_matching`] method to get matches in a more readable format.
+
+    ```py
+    # Process outputs for visualization
+    image_sizes = [[(image.height, image.width) for image in images]]
+    processed_outputs = processor.post_process_keypoint_matching(outputs, image_sizes, threshold=0.2)
+    
+    for i, output in enumerate(processed_outputs):
+        print(f"For the image pair {i}")
+        for keypoint0, keypoint1, matching_score in zip(
+                output["keypoints0"], output["keypoints1"], output["matching_scores"]
+        ):
+            print(f"Keypoint at {keypoint0.numpy()} matches with keypoint at {keypoint1.numpy()} with score {matching_score}")
+    ```
+
+- Visualize the matches between the images using the built-in plotting functionality.
+
+    ```py
+    # Easy visualization using the built-in plotting method
+    visualized_images = processor.visualize_keypoint_matching(images, processed_outputs)
+    ```
+
+- EfficientLoFTR uses a novel two-stage correlation layer that achieves accurate subpixel correspondences, improving upon the original LoFTR's fine correlation module.
+
+<div class="flex justify-center">
+    <img src="https://cdn-uploads.huggingface.co/production/uploads/632885ba1558dac67c440aa8/2nJZQlFToCYp_iLurvcZ4.png">
+</div>
+
+## Resources
+
+- Refer to the [original EfficientLoFTR repository](https://github.com/zju3dv/EfficientLoFTR) for more examples and implementation details.
+- [EfficientLoFTR project page](https://zju3dv.github.io/efficientloftr/) with interactive demos and additional information.

 ## EfficientLoFTRConfig

@ -101,6 +131,8 @@ The original code can be found [here](https://github.com/zju3dv/EfficientLoFTR).
 - post_process_keypoint_matching
 - visualize_keypoint_matching

+<frameworkcontent>
+<pt>
 ## EfficientLoFTRModel

 [[autodoc]] EfficientLoFTRModel
@ -111,4 +143,7 @@ The original code can be found [here](https://github.com/zju3dv/EfficientLoFTR).

 [[autodoc]] EfficientLoFTRForKeypointMatching

- forward
+- forward
+
+</pt>
+</frameworkcontent>
--- a/docs/source/en/model_doc/ernie.md
+++ b/docs/source/en/model_doc/ernie.md
@ -14,29 +14,83 @@ rendered properly in your Markdown viewer.

 -->

-# ERNIE
-
-<div class="flex flex-wrap space-x-1">
-<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white" >
+    </div>
 </div>

-## Overview
-ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
-including [ERNIE1.0](https://huggingface.co/papers/1904.09223), [ERNIE2.0](https://ojs.aaai.org/index.php/AAAI/article/view/6428),
-[ERNIE3.0](https://huggingface.co/papers/2107.02137), [ERNIE-Gram](https://huggingface.co/papers/2010.12148), [ERNIE-health](https://huggingface.co/papers/2110.07244), etc.
+# ERNIE

-These models are contributed by [nghuyong](https://huggingface.co/nghuyong) and the official code can be found in [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) (in PaddlePaddle).
+[ERNIE1.0](https://arxiv.org/abs/1904.09223), [ERNIE2.0](https://ojs.aaai.org/index.php/AAAI/article/view/6428),
+[ERNIE3.0](https://arxiv.org/abs/2107.02137), [ERNIE-Gram](https://arxiv.org/abs/2010.12148), [ERNIE-health](https://arxiv.org/abs/2110.07244) are a series of powerful models proposed by baidu, especially in Chinese tasks.

-### Usage example
-Take `ernie-1.0-base-zh` as an example:
+ERNIE (Enhanced Representation through kNowledge IntEgration) is designed to learn language representation enhanced by knowledge masking strategies, which includes entity-level masking and phrase-level masking.

-```Python
-from transformers import AutoTokenizer, AutoModel
-tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
-model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Other ERNIE models released by baidu can be found at [Ernie 4.5](./ernie4_5.md), and [Ernie 4.5 MoE](./ernie4_5_moe.md).
+
+> [!TIP]
+> This model was contributed by [nghuyong](https://huggingface.co/nghuyong), and the official code can be found in [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) (in PaddlePaddle).
+>
+> Click on the ERNIE models in the right sidebar for more examples of how to apply ERNIE to different language tasks.
+
+The example below demonstrates how to predict the `[MASK]` token with [`Pipeline`], [`AutoModel`], and from the command line.
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+from transformers import pipeline
+
+pipeline = pipeline(
+    task="fill-mask",
+    model="nghuyong/ernie-3.0-xbase-zh"
+)
+
+pipeline("巴黎是[MASK]国的首都。")
 ```

-### Model checkpoints
+</hfoption>
+<hfoption id="AutoModel">
+
+```py
+import torch
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained(
+    "nghuyong/ernie-3.0-xbase-zh",
+)
+model = AutoModelForMaskedLM.from_pretrained(
+    "nghuyong/ernie-3.0-xbase-zh",
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+inputs = tokenizer("巴黎是[MASK]国的首都。", return_tensors="pt").to("cuda")
+
+with torch.no_grad():
+    outputs = model(**inputs)
+    predictions = outputs.logits
+
+masked_index = torch.where(inputs['input_ids'] == tokenizer.mask_token_id)[1]
+predicted_token_id = predictions[0, masked_index].argmax(dim=-1)
+predicted_token = tokenizer.decode(predicted_token_id)
+
+print(f"The predicted token is: {predicted_token}")
+```
+
+</hfoption>
+<hfoption id="transformers CLI">
+
+```bash
+echo -e "巴黎是[MASK]国的首都。" | transformers run --task fill-mask --model nghuyong/ernie-3.0-xbase-zh --device 0
+```
+
+</hfoption>
+</hfoptions>
+
+## Notes
+
+Model variants are available in different sizes and languages.

 |     Model Name      | Language |           Description           |
 |:-------------------:|:--------:|:-------------------------------:|
@ -51,18 +105,11 @@ model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
 |   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
 |    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |

-You can find all the supported models from huggingface's model hub: [huggingface.co/nghuyong](https://huggingface.co/nghuyong), and model details from paddle's official
-repo: [PaddleNLP](https://paddlenlp.readthedocs.io/zh/latest/model_zoo/transformers/ERNIE/contents.html)
-and [ERNIE](https://github.com/PaddlePaddle/ERNIE/blob/repro).
-
 ## Resources

- [Text classification task guide](../tasks/sequence_classification)
- [Token classification task guide](../tasks/token_classification)
- [Question answering task guide](../tasks/question_answering)
- [Causal language modeling task guide](../tasks/language_modeling)
- [Masked language modeling task guide](../tasks/masked_language_modeling)
- [Multiple choice task guide](../tasks/multiple_choice)
+You can find all the supported models from huggingface's model hub: [huggingface.co/nghuyong](https://huggingface.co/nghuyong), and model details from paddle's official
+repo: [PaddleNLP](https://paddlenlp.readthedocs.io/zh/latest/model_zoo/transformers/ERNIE/contents.html)
+and [ERNIE's legacy branch](https://github.com/PaddlePaddle/ERNIE/tree/legacy/develop).

 ## ErnieConfig

@ -116,4 +163,4 @@ and [ERNIE](https://github.com/PaddlePaddle/ERNIE/blob/repro).
 ## ErnieForQuestionAnswering

 [[autodoc]] ErnieForQuestionAnswering
-    - forward
+    - forward
--- a/docs/source/en/model_doc/ernie4_5.md
+++ b/docs/source/en/model_doc/ernie4_5.md
@ -31,7 +31,7 @@ The Ernie 4.5 model was released in the [Ernie 4.5 Model Family](https://ernie.b
 This family of models contains multiple different architectures and model sizes. This model in specific targets the base text
 model without mixture of experts (moe) with 0.3B parameters in total. It uses the standard [Llama](./llama.md) at its core.

-Other models from the family can be found at [Ernie 4.5 MoE](./ernie4_5_moe.md).
+Other models from the family can be found at [Ernie 4.5 Moe](./ernie4_5_moe.md).

 <div class="flex justify-center">
    <img src="https://ernie.baidu.com/blog/posts/ernie4.5/overview.png"/>
--- a/docs/source/en/model_doc/ernie4_5_moe.md
+++ b/docs/source/en/model_doc/ernie4_5_moe.md
@ -23,11 +23,11 @@ rendered properly in your Markdown viewer.
    </div>
 </div>

-# Ernie 4.5 MoE
+# Ernie 4.5 Moe

 ## Overview

-The Ernie 4.5 MoE model was released in the [Ernie 4.5 Model Family](https://ernie.baidu.com/blog/posts/ernie4.5/) release by baidu.
+The Ernie 4.5 Moe model was released in the [Ernie 4.5 Model Family](https://ernie.baidu.com/blog/posts/ernie4.5/) release by baidu.
 This family of models contains multiple different architectures and model sizes. This model in specific targets the base text
 model with mixture of experts (moe) - one with 21B total, 3B active parameters and another one with 300B total, 47B active parameters.
 It uses the standard [Llama](./llama.md) at its core combined with a specialized MoE based on [Mixtral](./mixtral.md) with additional shared
@ -167,17 +167,17 @@ This model was contributed by [Anton Vlasjuk](https://huggingface.co/AntonV).
 The original code can be found [here](https://github.com/PaddlePaddle/ERNIE).


-## Ernie4_5_MoEConfig
+## Ernie4_5_MoeConfig

-[[autodoc]] Ernie4_5_MoEConfig
+[[autodoc]] Ernie4_5_MoeConfig

-## Ernie4_5_MoEModel
+## Ernie4_5_MoeModel

-[[autodoc]] Ernie4_5_MoEModel
+[[autodoc]] Ernie4_5_MoeModel
    - forward

-## Ernie4_5_MoEForCausalLM
+## Ernie4_5_MoeForCausalLM

-[[autodoc]] Ernie4_5_MoEForCausalLM
+[[autodoc]] Ernie4_5_MoeForCausalLM
    - forward
    - generate
--- a/docs/source/en/model_doc/evolla.md
+++ b/docs/source/en/model_doc/evolla.md
@ -0,0 +1,95 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Evolla
+
+## Overview
+
+The Evolla model was proposed in [Decoding the Molecular Language of Proteins with Evolla](https://doi.org/10.1101/2025.01.05.630192) by [Zhou et al.](https://doi.org/10.1101/2025.01.05.630192).
+
+Evolla is an advanced 80-billion-parameter protein-language generative model designed to decode the molecular language of proteins. It integrates information from protein sequences, structures, and user queries to generate precise and contextually nuanced insights into protein function. Trained on an unprecedented AI-generated dataset of 546 million protein question-answer pairs and 150 billion word tokens, Evolla significantly advances research in proteomics and functional genomics, providing expert-level insights and shedding light on the molecular logic encoded in proteins.
+
+The abstract from the paper is the following:
+
+*Proteins, nature’s intricate molecular machines, are the products of billions of years of evolution and play fundamental roles in sustaining life. Yet, deciphering their molecular language - that is, understanding how protein sequences and structures encode and determine biological functions - remains a corner-stone challenge in modern biology. Here, we introduce Evolla, an 80 billion frontier protein-language generative model designed to decode the molecular language of proteins. By integrating information from protein sequences, structures, and user queries, Evolla generates precise and contextually nuanced insights into protein function. A key innovation of Evolla lies in its training on an unprecedented AI-generated dataset: 546 million protein question-answer pairs and 150 billion word tokens, designed to reflect the immense complexity and functional diversity of proteins. Post-pretraining, Evolla integrates Direct Preference Optimization (DPO) to refine the model based on preference signals and Retrieval-Augmented Generation (RAG) for external knowledge incorporation, improving response quality and relevance. To evaluate its performance, we propose a novel framework, Instructional Response Space (IRS), demonstrating that Evolla delivers expert-level insights, advancing research in proteomics and functional genomics while shedding light on the molecular logic encoded in proteins. The online demo is available at http://www.chat-protein.com/.*
+
+Examples:
+
+```python
+processor = EvollaProcessor.from_pretrained("westlake-repl/Evolla-10B-DPO-hf")
+model = EvollaForProteinText2Text.from_pretrained("westlake-repl/Evolla-10B-DPO-hf")
+# aa_seq should have same length as foldseek
+protein_inputs = [
+    {
+        
+        "aa_seq": "MATGGRRG...",
+        "foldseek": "###lqpfd...", # hashtag means the low-confidence foldseek tokens
+    },
+    {
+        "aa_seq": "MLPGLALL...",
+        "foldseek": "dfwwkwad...",
+    }
+]
+message_list = [
+    [
+        {
+            "role": "system",
+            "content": "You are an AI expert that can answer any questions about protein.",
+        },
+        {"role": "user", "content": "What is the function of this protein?"},
+    ],
+    [
+        {
+            "role": "system",
+            "content": "You are an AI expert that can answer any questions about protein.",
+        },
+        {"role": "user", "content": "What is the function of this protein?"},
+    ]
+]
+input_dict = processor(
+    protein_informations, messages_list, return_tensors="pt", text_max_length=512, protein_max_length=1024
+)
+with torch.no_grad():
+    generated_ids = hf_model.generate(**input_dict)
+generated_texts = processor.batch_decode(
+    generated_ids, skip_special_tokens=True
+)
+```
+
+Tips:
+
+- This model was contributed by [Xibin Bayes Zhou](https://huggingface.co/XibinBayesZhou).
+- The original code can be found [here](https://github.com/westlake-repl/Evolla).
+
+
+## EvollaConfig
+
+[[autodoc]] EvollaConfig
+
+## EvollaModel
+
+[[autodoc]] EvollaModel
+    - forward
+
+## EvollaForProteinText2Text
+
+[[autodoc]] EvollaForProteinText2Text
+    - forward
+
+## EvollaProcessor
+
+[[autodoc]] EvollaProcessor
+    - __call__
--- a/docs/source/en/model_doc/exaone4.md
+++ b/docs/source/en/model_doc/exaone4.md
@ -0,0 +1,208 @@
+<!--Copyright 2025 The LG AI Research and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# EXAONE 4
+
+## Overview
+
+**[EXAONE 4.0](https://github.com/LG-AI-EXAONE/EXAONE-4.0)** model is the language model, which integrates a **Non-reasoning mode** and **Reasoning mode** to achieve both the excellent usability of [EXAONE 3.5](https://github.com/LG-AI-EXAONE/EXAONE-3.5) and the advanced reasoning abilities of [EXAONE Deep](https://github.com/LG-AI-EXAONE/EXAONE-Deep). To pave the way for the agentic AI era, EXAONE 4.0 incorporates essential features such as agentic tool use, and its multilingual capabilities are extended
+to support Spanish in addition to English and Korean. 
+
+The EXAONE 4.0 model series consists of two sizes: a mid-size **32B** model optimized for high performance, and a small-size **1.2B** model designed for on-device applications.
+
+In the EXAONE 4.0 architecture, we apply new architectural changes compared to previous EXAONE models as below:
+
+1. **Hybrid Attention**: For the 32B model, we adopt hybrid attention scheme, which combines *Local attention (sliding window attention)* with *Global attention (full attention)* in a 3:1 ratio. We do not use RoPE (Rotary Positional Embedding) for global attention for better global context understanding.
+2. **QK-Reorder-Norm**: We reorder the LayerNorm position from the traditional Pre-LN scheme by applying LayerNorm directly to the attention and MLP outputs, and we add RMS normalization right after the Q and K projection. It helps yield better performance on downstream tasks despite consuming more computation.
+
+For more details, please refer to our [technical report](https://arxiv.org/abs/2507.11407), [HuggingFace paper](https://huggingface.co/papers/2507.11407), [blog](https://www.lgresearch.ai/blog/view?seq=576), and [GitHub](https://github.com/LG-AI-EXAONE/EXAONE-4.0).
+
+All model weights including quantized versions are available at [Huggingface Collections](https://huggingface.co/collections/LGAI-EXAONE/exaone-40-686b2e0069800c835ed48375).
+
+
+## Model Details
+
+### Model Specifications
+
+| Model Configuration | 32B | 1.2B |
+|:-------------------|:-----:|:------:|
+| d_model | 5,120 | 2,048 |
+| Number of layers | 64 | 30 |
+| Normalization | QK-Reorder-LN | QK-Reorder-LN |
+| Non-linearity | SwiGLU | SwiGLU |
+| Feedforward dimension | 27,392 | 4,096 |
+| Attention type | Hybrid (3:1 Local-Global) | Global |
+| Head type | GQA | GQA |
+| Number of heads | 40 | 32 |
+| Number of KV heads | 8 | 8 |
+| Head size | 128 | 64 |
+| Max sequence length | 131,072 | 65,536 |
+| RoPE theta | 1,000,000 | 1,000,000 |
+| Tokenizer | BBPE | BBPE |
+| Vocab size | 102,400 | 102,400 |
+| Tied word embedding | False | True |
+| Knowledge cut-off | Nov. 2024 | Nov. 2024 |
+
+
+## Usage tips
+
+### Non-reasoning mode
+
+For general use, you can use the EXAONE 4.0 models with the following example:
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_name = "LGAI-EXAONE/EXAONE-4.0-32B"
+
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="bfloat16",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+# choose your prompt
+prompt = "Explain how wonderful you are"
+prompt = "Explica lo increíble que eres"
+prompt = "너가 얼마나 대단한지 설명해 봐"
+
+messages = [
+    {"role": "user", "content": prompt}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt"
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=128,
+    do_sample=False,
+)
+print(tokenizer.decode(output[0]))
+```
+
+### Reasoning mode
+
+The EXAONE 4.0 models have reasoning capabilities for handling complex problems. You can activate reasoning mode by using the `enable_thinking=True` argument with the tokenizer, which opens a reasoning block that starts with `<think>` tag without closing it.
+
+```python
+messages = [
+    {"role": "user", "content": "Which one is bigger, 3.12 vs 3.9?"}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt",
+    enable_thinking=True,
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=128,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.95
+)
+print(tokenizer.decode(output[0]))
+```
+
+> [!IMPORTANT]
+> The model generation with reasoning mode can be affected sensitively by sampling parameters, so please refer to the [Usage Guideline](https://github.com/LG-AI-EXAONE/EXAONE-4.0#usage-guideline) on official GitHub page for better quality.
+
+### Agentic tool use
+
+The EXAONE 4.0 models can be used as agents with their tool calling capabilities. You can provide tool schemas to the model for effective tool calling.
+
+```python
+import random
+
+def roll_dice(max_num: int):
+    return random.randint(1, max_num)
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "roll_dice",
+            "description": "Roll a dice with the number 1 to N. User can select the number N.",
+            "parameters": {
+                "type": "object",
+                "required": ["max_num"],
+                "properties": {
+                    "max_num": {
+                        "type": "int",
+                        "description": "Max number of the dice"
+                    }
+                }
+            }
+        }
+    }
+]
+
+messages = [
+    {"role": "user", "content": "Roll D6 dice twice!"}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt",
+    tools=tools,
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=1024,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.95,
+)
+print(tokenizer.decode(output[0]))
+```
+
+## Exaone4Config
+
+[[autodoc]] Exaone4Config
+
+## Exaone4Model
+
+[[autodoc]] Exaone4Model
+    - forward
+
+## Exaone4ForCausalLM
+
+[[autodoc]] Exaone4ForCausalLM
+    - forward
+
+## Exaone4ForSequenceClassification
+
+[[autodoc]] Exaone4ForSequenceClassification
+    - forward
+
+## Exaone4ForTokenClassification
+
+[[autodoc]] Exaone4ForTokenClassification
+    - forward
+
+## Exaone4ForQuestionAnswering
+
+[[autodoc]] Exaone4ForQuestionAnswering
+    - forward
--- a/docs/source/en/model_doc/granitemoehybrid.md
+++ b/docs/source/en/model_doc/granitemoehybrid.md
@ -48,6 +48,32 @@ for i in output:

 This HF implementation is contributed by [Sukriti Sharma](https://huggingface.co/SukritiSharma) and [Alexander Brooks](https://huggingface.co/abrooks9944).

+## Notes
+
+- `GraniteMoeHybridForCausalLM` supports padding-free training which concatenates distinct training examples while still processing inputs as separate batches. It can significantly accelerate inference by [~2x](https://github.com/huggingface/transformers/pull/35861#issue-2807873129) (depending on model and data distribution) and reduce memory-usage if there are examples of varying lengths by avoiding unnecessary compute and memory overhead from padding tokens.
+
+  Padding-free training requires the `flash-attn`, `mamba-ssm`, and `causal-conv1d` packages and the following arguments must be passed to the model in addition to `input_ids` and `labels`.
+
+  - `position_ids: torch.LongTensor`: the position index of each token in each sequence.
+  - `seq_idx: torch.IntTensor`: the index of each sequence in the batch.
+  - Each of the [`FlashAttentionKwargs`]
+    - `cu_seq_lens_q: torch.LongTensor`: the cumulative sequence lengths of all queries.
+    - `cu_seq_lens_k: torch.LongTensor`: the cumulative sequence lengths of all keys.
+    - `max_length_q: int`: the longest query length in the batch.
+    - `max_length_k: int`: the longest key length in the batch.
+
+  The `attention_mask` inputs should not be provided. The [`DataCollatorWithFlattening`] programmatically generates the set of additional arguments above using `return_seq_idx=True` and `return_flash_attn_kwargs=True`. See the [Improving Hugging Face Training Efficiency Through Packing with Flash Attention](https://huggingface.co/blog/packing-with-FA2) blog post for additional information.
+
+  ```python
+  from transformers import DataCollatorWithFlattening
+
+  # Example of using padding-free training
+  data_collator = DataCollatorWithFlattening(
+      tokenizer=tokenizer,
+      return_seq_idx=True,
+      return_flash_attn_kwargs=True
+  )
+  ```

 ## GraniteMoeHybridConfig

@ -61,4 +87,4 @@ This HF implementation is contributed by [Sukriti Sharma](https://huggingface.co
 ## GraniteMoeHybridForCausalLM

 [[autodoc]] GraniteMoeHybridForCausalLM
-    - forward
+    - forward
--- a/docs/source/en/model_doc/segformer.md
+++ b/docs/source/en/model_doc/segformer.md
@ -128,6 +128,12 @@ If you're interested in submitting a resource to be included here, please feel f
    - preprocess
    - post_process_semantic_segmentation

+## SegformerImageProcessorFast
+
+[[autodoc]] SegformerImageProcessorFast
+    - preprocess
+    - post_process_semantic_segmentation
+
 <frameworkcontent>
 <pt>

@ -175,4 +181,4 @@ If you're interested in submitting a resource to be included here, please feel f
    - call

 </tf>
-</frameworkcontent>
+</frameworkcontent>
--- a/docs/source/en/model_doc/superpoint.md
+++ b/docs/source/en/model_doc/superpoint.md
@ -130,6 +130,11 @@ processed_outputs = processor.post_process_keypoint_detection(outputs, [image_si

 [[autodoc]] SuperPointImageProcessor

+- preprocess
+
+## SuperPointImageProcessorFast
+
+[[autodoc]] SuperPointImageProcessorFast
 - preprocess
 - post_process_keypoint_detection

--- a/docs/source/en/model_doc/voxtral.md
+++ b/docs/source/en/model_doc/voxtral.md
@ -37,7 +37,11 @@ Voxtral builds on Ministral-3B by adding audio processing capabilities:

 ## Usage

-Let's first load the model!
+### Audio Instruct Mode
+
+The model supports audio-text instructions, including multi-turn and multi-audio interactions, all processed in batches.
+
+➡️ audio + text instruction
 ```python
 from transformers import VoxtralForConditionalGeneration, AutoProcessor
 import torch
@ -47,14 +51,7 @@ repo_id = "mistralai/Voxtral-Mini-3B-2507"

 processor = AutoProcessor.from_pretrained(repo_id)
 model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
-```

-### Audio Instruct Mode
-
-The model supports audio-text instructions, including multi-turn and multi-audio interactions, all processed in batches.
-
-➡️ audio + text instruction
-```python
 conversation = [
    {
        "role": "user",
@ -82,6 +79,15 @@ print("=" * 80)

 ➡️ multi-audio + text instruction 
 ```python
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
 conversation = [
    {
        "role": "user",
@ -113,6 +119,15 @@ print("=" * 80)

 ➡️ multi-turn:
 ```python
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
 conversation = [
    {
        "role": "user",
@ -158,6 +173,15 @@ print("=" * 80)

 ➡️ text only:
 ```python
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
 conversation = [
    {
        "role": "user",
@ -184,6 +208,15 @@ print("=" * 80)

 ➡️ audio only:
 ```python
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
 conversation = [
    {
        "role": "user",
@ -210,6 +243,15 @@ print("=" * 80)

 ➡️ batched inference!
 ```python
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
 conversations = [
    [
        {
@ -262,7 +304,16 @@ for decoded_output in decoded_outputs:
 Use the model to transcribe audio (supports English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)!

 ```python
-inputs = processor.apply_transcrition_request(language="en", audio="https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/obama.mp3")
+from transformers import VoxtralForConditionalGeneration, AutoProcessor
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+repo_id = "mistralai/Voxtral-Mini-3B-2507"
+
+processor = AutoProcessor.from_pretrained(repo_id)
+model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
+
+inputs = processor.apply_transcription_request(language="en", audio="https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/obama.mp3", model_id=repo_id)
 inputs = inputs.to(device, dtype=torch.bfloat16)

 outputs = model.generate(**inputs, max_new_tokens=500)
--- a/docs/source/en/model_doc/xlstm.md
+++ b/docs/source/en/model_doc/xlstm.md
@ -0,0 +1,47 @@
+<!--Copyright 2025 NXAI GmbH. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+
+# xLSTM
+
+## Overview
+
+The xLSTM model was proposed in [xLSTM: Extended Long Short-Term Memory](https://openreview.net/forum?id=ARAxPPIAhq) by Maximilian Beck*, Korbinian Pöppel*, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, Günter Klambauer, Johannes Brandstetter and Sepp Hochreiter.
+xLSTM updates the original LSTM architecture to be competitive with Transformer models by introducing exponential gating, matrix memory expansion, and parallelizable training and ingestion.
+
+The [7B model](https://hf.co/NX-AI/xLSTM-7b) variant was trained by the xLSTM team Maximilian Beck, Korbinian Pöppel, Phillip Lippe, Richard Kurle, Patrick Blies, Sebastian Böck and Sepp Hochreiter at NXAI.
+
+The abstract from the paper is the following:
+
+*In the 1990s, the constant error carousel and gating were introduced as the central ideas of the Long Short-Term Memory (LSTM). Since then, LSTMs have stood the test of time and contributed to numerous deep learning success stories, in particular they constituted the first Large Language Models (LLMs). However, the advent of the Transformer technology with parallelizable self-attention at its core marked the dawn of a new era, outpacing LSTMs at scale. We now raise a simple question: How far do we get in language modeling when scaling LSTMs to billions of parameters, leveraging the latest techniques from modern LLMs, but mitigating known limitations of LSTMs? Firstly, we introduce exponential gating with appropriate normalization and stabilization techniques. Secondly, we modify the LSTM memory structure, obtaining: (i) sLSTM with a scalar memory, a scalar update, and new memory mixing, (ii) mLSTM that is fully parallelizable with a matrix memory and a covariance update rule. Integrating these LSTM extensions into residual block backbones yields xLSTM blocks that are then residually stacked into xLSTM architectures. Exponential gating and modified memory structures boost xLSTM capabilities to perform favorably when compared to state-of-the-art Transformers and State Space Models, both in performance and scaling.*
+
+This model was contributed by [NX-AI](https://huggingface.co/NX-AI).
+The original code can be found [here](https://github.com/NX-AI/xlstm).
+
+
+## xLSTMConfig
+
+[[autodoc]] xLSTMConfig
+
+## xLSTMModel
+
+[[autodoc]] xLSTMModel
+    - forward
+
+## xLSTMLMHeadModel
+
+[[autodoc]] xLSTMForCausalLM
+    - forward
--- a/docs/source/en/model_sharing.md
+++ b/docs/source/en/model_sharing.md
@ -28,7 +28,7 @@ To share a model to the Hub, you need a Hugging Face [account](https://hf.co/joi
 <hfoption id="huggingface-CLI">

 ```bash
-huggingface-cli login
+hf auth login
 ```

 </hfoption>
--- a/docs/source/en/quicktour.md
+++ b/docs/source/en/quicktour.md
@ -49,7 +49,7 @@ notebook_login()
 Make sure the [huggingface_hub[cli]](https://huggingface.co/docs/huggingface_hub/guides/cli#getting-started) package is installed and run the command below. Paste your User Access Token when prompted to log in.

 ```bash
-huggingface-cli login
+hf auth login
 ```

 </hfoption>
--- a/docs/source/en/tasks/semantic_segmentation.md
+++ b/docs/source/en/tasks/semantic_segmentation.md
@ -289,7 +289,7 @@ You could also create and use your own dataset if you prefer to train with the [
          }
     )

-     # step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+     # step 3: push to Hub (assumes you have ran the hf auth login command in a terminal/notebook)
     dataset.push_to_hub("your-name/dataset-repo")

     # optionally, you can push to a private repo on the Hub
--- a/docs/source/es/custom_models.md
+++ b/docs/source/es/custom_models.md
@ -285,7 +285,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 Ahora, para enviar el modelo al Hub, asegúrate de haber iniciado sesión. Ejecuta en tu terminal:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 o desde un _notebook_:
--- a/docs/source/es/model_sharing.md
+++ b/docs/source/es/model_sharing.md
@ -56,7 +56,7 @@ Los archivos son editados fácilmente dentro de un repositorio. Incluso puedes o
 Antes de compartir un modelo al Hub necesitarás tus credenciales de Hugging Face. Si tienes acceso a una terminal ejecuta el siguiente comando en el entorno virtual donde 🤗 Transformers esté instalado. Esto guardará tu token de acceso dentro de tu carpeta cache de Hugging Face (~/.cache/ by default):

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Si usas un notebook como Jupyter o Colaboratory, asegúrate de tener instalada la biblioteca [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library). Esta biblioteca te permitirá interactuar por código con el Hub.
--- a/docs/source/es/run_scripts.md
+++ b/docs/source/es/run_scripts.md
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
 Todos los scripts pueden cargar tu modelo final en el [Model Hub](https://huggingface.co/models). Asegúrate de haber iniciado sesión en Hugging Face antes de comenzar:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Luego agrega el argumento `push_to_hub` al script. Este argumento creará un repositorio con tu nombre de usuario Hugging Face y el nombre de la carpeta especificado en `output_dir`.
--- a/docs/source/fr/run_scripts_fr.md
+++ b/docs/source/fr/run_scripts_fr.md
@ -327,7 +327,7 @@ python examples/pytorch/summarization/run_summarization.py
 Tous les scripts peuvent télécharger votre modèle final sur le Model Hub. Assurez-vous que vous êtes connecté à Hugging Face avant de commencer :

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Ensuite, ajoutez l'argument `push_to_hub` au script. Cet argument créera un dépôt avec votre nom d'utilisateur Hugging Face et le nom du dossier spécifié dans `output_dir`.
--- a/docs/source/it/custom_models.md
+++ b/docs/source/it/custom_models.md
@ -285,7 +285,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 Adesso, per inviare il modello all'Hub, assicurati di aver effettuato l'accesso. Lancia dal tuo terminale:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 O da un notebook:
--- a/docs/source/it/model_sharing.md
+++ b/docs/source/it/model_sharing.md
@ -56,7 +56,7 @@ Anche i file possono essere modificati facilmente in un repository ed è possibi
 Prima di condividere un modello nell'Hub, hai bisogno delle tue credenziali di Hugging Face. Se hai accesso ad un terminale, esegui il seguente comando nell'ambiente virtuale in cui è installata la libreria 🤗 Transformers. Questo memorizzerà il tuo token di accesso nella cartella cache di Hugging Face (di default `~/.cache/`):

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Se stai usando un notebook come Jupyter o Colaboratory, assicurati di avere la libreria [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library) installata. Questa libreria ti permette di interagire in maniera programmatica con l'Hub.
--- a/docs/source/it/run_scripts.md
+++ b/docs/source/it/run_scripts.md
@ -324,7 +324,7 @@ python examples/pytorch/summarization/run_summarization.py
 Tutti gli script possono caricare il tuo modello finale al [Model Hub](https://huggingface.co/models). Prima di iniziare, assicurati di aver effettuato l'accesso su Hugging Face:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Poi, aggiungi l'argomento `push_to_hub` allo script. Questo argomento consentirà di creare un repository con il tuo username Hugging Face e la cartella specificata in `output_dir`.
--- a/docs/source/ja/custom_models.md
+++ b/docs/source/ja/custom_models.md
@ -270,7 +270,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 モデルをHubに送信するには、ログインしていることを確認してください。ターミナルで次のコマンドを実行します：

 ```bash
-huggingface-cli login
+hf auth login
 ```

 またはノートブックから：
--- a/docs/source/ja/model_sharing.md
+++ b/docs/source/ja/model_sharing.md
@ -56,7 +56,7 @@ Model Hubの組み込みバージョニングはgitおよび[git-lfs](https://gi
 モデルをHubに共有する前に、Hugging Faceの認証情報が必要です。ターミナルへのアクセス権がある場合、🤗 Transformersがインストールされている仮想環境で以下のコマンドを実行します。これにより、アクセストークンがHugging Faceのキャッシュフォルダに保存されます（デフォルトでは `~/.cache/` に保存されます）：

 ```bash
-huggingface-cli login
+hf auth login
 ```

 JupyterやColaboratoryのようなノートブックを使用している場合、[`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library)ライブラリがインストールされていることを確認してください。
--- a/docs/source/ja/run_scripts.md
+++ b/docs/source/ja/run_scripts.md
@ -337,7 +337,7 @@ python examples/pytorch/summarization/run_summarization.py
 すべてのスクリプトは、最終的なモデルを [Model Hub](https://huggingface.co/models) にアップロードできます。開始する前に Hugging Face にログインしていることを確認してください。

 ```bash
-huggingface-cli login
+hf auth login
 ```

 次に、スクリプトに `push_to_hub` 引数を追加します。この引数は、Hugging Face のユーザー名と `output_dir` で指定したフォルダ名でリポジトリを作成します。
--- a/docs/source/ko/_toctree.yml
+++ b/docs/source/ko/_toctree.yml
--- a/docs/source/ko/accelerator_selection.md
+++ b/docs/source/ko/accelerator_selection.md
@ -14,6 +14,8 @@ rendered properly in your Markdown viewer.

 -->

+<!-- TODO: this was copied from the korean version of `gpu_selection.md`, and is not up to date with the english version of `accelerator_selection.md` -->
+
 # GPU 선택하기 [[gpu-selection]]

 분산 학습 과정에서 사용할 GPU의 개수와 순서를 정할 수 있습니다. 이 방법은 서로 다른 연산 성능을 가진 GPU가 있을 때 더 빠른 GPU를 우선적으로 사용하거나, 사용 가능한 GPU 중 일부만 선택하여 활용하고자 할 때 유용합니다. 이 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html)에서 모두 작동합니다. Accelerate나 [DeepSpeed 통합](./main_classes/deepspeed)은 필요하지 않습니다.
@ -93,4 +95,4 @@ export CUDA_DEVICE_ORDER=FASTEST_FIRST

 The `CUDA_DEVICE_ORDER` is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set `CUDA_DEVICE_ORDER=FASTEST_FIRST` to always use the newer and faster GPU first (`nvidia-smi` or `rocm-smi` still reports the GPUs in their PCIe order). Or you could also set `export CUDA_VISIBLE_DEVICES=1,0`.

-`CUDA_DEVICE_ORDER`는 구형 GPU와 신형 GPU가 혼합된 환경에서 특히 유용합니다. 예를 들어, 구형 GPU가 먼저 표시되지만 물리적으로 교체할 수 없는 경우, `CUDA_DEVICE_ORDER=FASTEST_FIRST`를 설정하면 항상 신형 및 더 빠른 GPU를 우선적으로 사용(nvidia-smi 또는 rocm-smi는 PCIe 순서대로 GPU를 표시함)할 수 있습니다. 또는, `export CUDA_VISIBLE_DEVICES=1,0`을 설정하여 GPU 사용 순서를 직접 지정할 수도 있습니다.
+`CUDA_DEVICE_ORDER`는 구형 GPU와 신형 GPU가 혼합된 환경에서 특히 유용합니다. 예를 들어, 구형 GPU가 먼저 표시되지만 물리적으로 교체할 수 없는 경우, `CUDA_DEVICE_ORDER=FASTEST_FIRST`를 설정하면 항상 신형 및 더 빠른 GPU를 우선적으로 사용(nvidia-smi 또는 rocm-smi는 PCIe 순서대로 GPU를 표시함)할 수 있습니다. 또는, `export CUDA_VISIBLE_DEVICES=1,0`을 설정하여 GPU 사용 순서를 직접 지정할 수도 있습니다.
--- a/docs/source/ko/attention.md
+++ b/docs/source/ko/attention.md
@ -1,54 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 어텐션 메커니즘[[attention_mechanisms]]
-
-대부분의 트랜스포머 모델은 정방행렬인 전체 어텐션을 사용합니다. 
-하지만 이는 긴 텍스트를 다룰 때는 큰 계산 병목 현상을 유발할 수 있습니다. 
-`Longformer`와 `Reformer`는 훈련 속도를 높이기 위해 어텐션 행렬의 희소 버전을 사용하여 효율을 높이려는 모델입니다.
-
-## LSH 어텐션[[lsh_attention]]
-
-
-[Reformer](model_doc/reformer)는 LSH(Locality Sensitive Hashing) 어텐션을 사용합니다. softmax(QK^t)에서는 행렬 QK^t의 (softmax 차원에서) 가장 큰 요소들만 유용한 기여를 할 것입니다. 
-따라서 각각의 쿼리 q에 대해, q와 가까운 키 k만 고려할 수 있습니다. 해시 함수는 q와 k가 가까운지 여부를 결정하는 데 사용됩니다. 
-어텐션 마스크는 현재 토큰을 마스킹하여 변경됩니다. 이 때 첫 번째 위치의 토큰은 제외합니다. 왜냐하면 쿼리와 키가 동일한 값을 갖게 되기 때문입니다(서로 매우 유사함). 
-해시는 약간의 무작위성을 가질 수 있으므로, 실제로는 여러 개의 해시 함수가 사용되고 (`n_rounds` 매개변수에 의해 결정됨) 그 후에 평균값을 취하게 됩니다.
-
-## 지역 어텐션[[local_attention]]
-
-[Longformer](model_doc/longformer)는 지역 어텐션을 사용합니다. 종종 특정 토큰에 대해 지역 컨텍스트(예: 왼쪽과 오른쪽에 있는 두 개의 토큰은 무엇인가요?)만으로도 작업을 수행하는데 충분합니다. 
-또한 작은 창(window)을 가진 어텐션 레이어를 쌓음으로써 마지막 레이어는 창 내의 토큰뿐만 아니라 더 많은 수의 토큰에 대한 수용 영역(receptive field)을 갖게 되어 전체 문장의 표현을 구축할 수 있습니다.
-
-사전에 선택된 일부 입력 토큰들은 전역 어텐션을 받습니다. 이 몇 개의 토큰에 대해서는 어텐션 행렬이 모든 토큰에 접근할 수 있으며, 이 과정은 대칭적으로 이루어집니다. 
-다른 모든 토큰들은 로컬 창 내의 토큰들에 더해 해당 특정 토큰들에도 접근할 수 있습니다. 이는 논문의 Figure 2d에서 나타나며, 아래에 샘플 어텐션 마스크가 제시되어 있습니다:
-
-
-<div class="flex justify-center">
-    <img scale="50 %" align="center" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/local_attention_mask.png"/>
-</div>
-
-
-적은 파라미터의 어텐션 행렬을 사용하면 모델이 더 큰 시퀀스 입력 길이를 가질 수 있습니다.
-
-## 다른 방법들[[other_tricks]]
-
-### 축별 위치 인코딩[[axial_positional_encodings]]
-
-[Reformer](model_doc/reformer)는 축별 위치 인코딩(axial positional encodings)을 사용합니다. 기존의 트랜스포머 모델에서는 위치 인코딩 행렬 E는 크기가 \\(l \times d\\)인 행렬이며, 
-여기서 \\(l\\)은 시퀀스 길이(sequence length)이고 \\(d\\)는 숨겨진 상태(hidden state)의 차원입니다. 매우 긴 텍스트의 경우, 이 행렬은 매우 크며 GPU 상에서 공간을 많이 차지할 수 있습니다. 
-이를 완화하기 위해, 축별 위치 인코딩은 큰 행렬 E를 두 개의 작은 행렬 E1과 E2로 분해합니다. 이때 E1의 크기는 \\(l_{1} \times d_{1}\\)이고, E2의 크기는 \\(l_{2} \times d_{2}\\)입니다. 
-이때 \\(l_{1} \times l_{2} = l\\)이고 \\(d_{1} + d_{2} = d\\)(길이에 대한 곱셈 연산을 사용하면 훨씬 작아집니다). E의 시간 단계 j에 대한 임베딩은 E1에서 시간 단계 \\(j \% l1\\)의 임베딩과 E2에서 시간 단계  \\(j // l1\\)의 임베딩을 연결하여 얻습니다.
--- a/docs/source/ko/autoclass_tutorial.md
+++ b/docs/source/ko/autoclass_tutorial.md
@ -1,144 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# AutoClass로 사전 학습된 인스턴스 로드[[load-pretrained-instances-with-an-autoclass]]
-
-트랜스포머 아키텍처가 매우 다양하기 때문에 체크포인트에 맞는 아키텍처를 생성하는 것이 어려울 수 있습니다. 라이브러리를 쉽고 간단하며 유연하게 사용하기 위한 Transformer 핵심 철학의 일환으로, `AutoClass`는 주어진 체크포인트에서 올바른 아키텍처를 자동으로 추론하여 로드합니다. `from_pretrained()` 메서드를 사용하면 모든 아키텍처에 대해 사전 학습된 모델을 빠르게 로드할 수 있으므로 모델을 처음부터 학습하는 데 시간과 리소스를 투입할 필요가 없습니다. 
-체크포인트에 구애받지 않는 코드를 생성한다는 것은 코드가 한 체크포인트에서 작동하면 아키텍처가 다르더라도 다른 체크포인트(유사한 작업에 대해 학습된 경우)에서도 작동한다는 것을 의미합니다.
-
-<Tip>
-
-아키텍처는 모델의 골격을 의미하며 체크포인트는 주어진 아키텍처에 대한 가중치입니다. 예를 들어, [BERT](https://huggingface.co/google-bert/bert-base-uncased)는 아키텍처이고, `google-bert/bert-base-uncased`는 체크포인트입니다. 모델은 아키텍처 또는 체크포인트를 의미할 수 있는 일반적인 용어입니다.
-
-</Tip>
-
-이 튜토리얼에서는 다음을 학습합니다:
-
-* 사전 학습된 토크나이저 로드하기.
-* 사전 학습된 이미지 프로세서 로드하기.
-* 사전 학습된 특징 추출기 로드하기.
-* 사전 훈련된 프로세서 로드하기.
-* 사전 학습된 모델 로드하기.
-
-## AutoTokenizer[[autotokenizer]]
-
-거의 모든 NLP 작업은 토크나이저로 시작됩니다. 토크나이저는 사용자의 입력을 모델에서 처리할 수 있는 형식으로 변환합니다.
-[`AutoTokenizer.from_pretrained`]로 토크나이저를 로드합니다:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
-```
-
-그리고 아래와 같이 입력을 토큰화합니다:
-
-```py
->>> sequence = "In a hole in the ground there lived a hobbit."
->>> print(tokenizer(sequence))
-{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
- 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
- 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
-```
-
-## AutoImageProcessor[[autoimageprocessor]]
-
-비전 작업의 경우 이미지 프로세서가 이미지를 올바른 입력 형식으로 처리합니다.
-
-```py
->>> from transformers import AutoImageProcessor
-
->>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
-```
-
-
-## AutoFeatureExtractor[[autofeatureextractor]]
-
-오디오 작업의 경우 특징 추출기가 오디오 신호를 올바른 입력 형식으로 처리합니다.
-
-[`AutoFeatureExtractor.from_pretrained`]로 특징 추출기를 로드합니다:
-
-```py
->>> from transformers import AutoFeatureExtractor
-
->>> feature_extractor = AutoFeatureExtractor.from_pretrained(
-...     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
-... )
-```
-
-## AutoProcessor[[autoprocessor]]
-
-멀티모달 작업에는 두 가지 유형의 전처리 도구를 결합한 프로세서가 필요합니다. 예를 들어 LayoutLMV2 모델에는 이미지를 처리하는 이미지 프로세서와 텍스트를 처리하는 토크나이저가 필요하며, 프로세서는 이 두 가지를 결합합니다.
-
-[`AutoProcessor.from_pretrained()`]로 프로세서를 로드합니다:
-
-```py
->>> from transformers import AutoProcessor
-
->>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
-```
-
-## AutoModel[[automodel]]
-
-<frameworkcontent>
-<pt>
-마지막으로 AutoModelFor클래스를 사용하면 주어진 작업에 대해 미리 학습된 모델을 로드할 수 있습니다 (사용 가능한 작업의 전체 목록은 [여기](model_doc/auto)를 참조하세요). 예를 들어, [`AutoModelForSequenceClassification.from_pretrained`]를 사용하여 시퀀스 분류용 모델을 로드할 수 있습니다:
-
-```py
->>> from transformers import AutoModelForSequenceClassification
-
->>> model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-동일한 체크포인트를 쉽게 재사용하여 다른 작업에 아키텍처를 로드할 수 있습니다:
-
-```py
->>> from transformers import AutoModelForTokenClassification
-
->>> model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-<Tip warning={true}>
-
-PyTorch모델의 경우 `from_pretrained()` 메서드는 내부적으로 피클을 사용하여 안전하지 않은 것으로 알려진 `torch.load()`를 사용합니다. 
-일반적으로 신뢰할 수 없는 소스에서 가져왔거나 변조되었을 수 있는 모델은 로드하지 마세요. 허깅 페이스 허브에서 호스팅되는 공개 모델의 경우 이러한 보안 위험이 부분적으로 완화되며, 각 커밋 시 멀웨어를 [검사합니다](https://huggingface.co/docs/hub/security-malware). GPG를 사용해 서명된 [커밋 검증](https://huggingface.co/docs/hub/security-gpg#signing-commits-with-gpg)과 같은 모범사례는 [문서](https://huggingface.co/docs/hub/security)를 참조하세요.
-
-텐서플로우와 Flax 체크포인트는 영향을 받지 않으며, `from_pretrained`메서드에 `from_tf` 와 `from_flax` 키워드 가변 인자를 사용하여 이 문제를 우회할 수 있습니다.
-
-</Tip>
-
-일반적으로 AutoTokenizer 클래스와 AutoModelFor 클래스를 사용하여 미리 학습된 모델 인스턴스를 로드하는 것이 좋습니다. 이렇게 하면 매번 올바른 아키텍처를 로드할 수 있습니다. 다음 [튜토리얼](preprocessing)에서는 새롭게 로드한 토크나이저, 이미지 프로세서, 특징 추출기를 사용하여 미세 튜닝용 데이터 세트를 전처리하는 방법에 대해 알아봅니다.
-</pt>
-<tf>
-마지막으로 `TFAutoModelFor` 클래스를 사용하면 주어진 작업에 대해 사전 훈련된 모델을 로드할 수 있습니다. (사용 가능한 작업의 전체 목록은 [여기](model_doc/auto)를 참조하세요. 예를 들어, [`TFAutoModelForSequenceClassification.from_pretrained`]로 시퀀스 분류를 위한 모델을 로드합니다:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-쉽게 동일한 체크포인트를 재사용하여 다른 작업에 아키텍처를 로드할 수 있습니다:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-일반적으로, `AutoTokenizer`클래스와 `TFAutoModelFor` 클래스를 사용하여 미리 학습된 모델 인스턴스를 로드하는 것이 좋습니다. 이렇게 하면 매번 올바른 아키텍처를 로드할 수 있습니다. 다음 [튜토리얼](preprocessing)에서는 새롭게 로드한 토크나이저, 이미지 프로세서, 특징 추출기를 사용하여 미세 튜닝용 데이터 세트를 전처리하는 방법에 대해 알아봅니다.
-</tf>
-</frameworkcontent>
--- a/docs/source/ko/bertology.md
+++ b/docs/source/ko/bertology.md
@ -1,41 +0,0 @@
-<!--Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# BERTology
-
-BERT와 같은 대규모 트랜스포머의 내부 동작을 조사하는 연구 분야가 점점 더 중요해지고 있습니다.
-혹자는 "BERTology"라 칭하기도 합니다. 이 분야의 좋은 예시는 다음과 같습니다:
-
-
- BERT는 고전적인 NLP 파이프라인의 재발견 - Ian Tenney, Dipanjan Das, Ellie Pavlick:
-  https://huggingface.co/papers/1905.05950
- 16개의 헤드가 정말로 1개보다 나은가? - Paul Michel, Omer Levy, Graham Neubig:
-  https://huggingface.co/papers/1905.10650
- BERT는 무엇을 보는가? BERT의 어텐션 분석 - Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning:
-  https://huggingface.co/papers/1906.04341
- CAT-probing: 프로그래밍 언어에 대해 사전훈련된 모델이 어떻게 코드 구조를 보는지 알아보기 위한 메트릭 기반 접근 방법:
-  https://huggingface.co/papers/2210.04633
-
-우리는 이 새로운 연구 분야의 발전을 돕기 위해, BERT/GPT/GPT-2 모델에 내부 표현을 살펴볼 수 있는 몇 가지 기능을 추가했습니다.
-이 기능들은 주로 Paul Michel의 훌륭한 작업을 참고하여 개발되었습니다
-(https://huggingface.co/papers/1905.10650):
-
-
- BERT/GPT/GPT-2의 모든 은닉 상태에 접근하기,
- BERT/GPT/GPT-2의 각 헤드의 모든 어텐션 가중치에 접근하기,
- 헤드의 출력 값과 그래디언트를 검색하여 헤드 중요도 점수를 계산하고 https://huggingface.co/papers/1905.10650에서 설명된 대로 헤드를 제거하는 기능을 제공합니다.
-
-이러한 기능들을 이해하고 직접 사용해볼 수 있도록 [bertology.py](https://github.com/huggingface/transformers-research-projects/tree/main/bertology/run_bertology.py) 예제 스크립트를 추가했습니다. 이 예제 스크립트에서는 GLUE에 대해 사전훈련된 모델에서 정보를 추출하고 모델을 가지치기(prune)해봅니다.
--- a/docs/source/ko/big_models.md
+++ b/docs/source/ko/big_models.md
@ -1,122 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 큰 모델 인스턴스화 [[instantiating-a-big-model]]
-
-매우 큰 사전훈련된 모델을 사용하려면, RAM 사용을 최소화해야 하는 과제가 있습니다. 일반적인 PyTorch 워크플로우는 다음과 같습니다:
-
-1. 무작위 가중치로 모델을 생성합니다.
-2. 사전훈련된 가중치를 불러옵니다.
-3. 사전훈련된 가중치를 무작위 모델에 적용합니다.
-
-1단계와 2단계 모두 모델의 전체 버전을 메모리에 적재해야 하며, 대부분 문제가 없지만 모델이 기가바이트급의 용량을 차지하기 시작하면 복사본 2개가 RAM을 초과하여 메모리 부족 이슈를 야기할 수 있습니다. 더 심각한 문제는 분산 학습을 위해 `torch.distributed`를 사용하는 경우, 프로세스마다 사전훈련된 모델을 로드하고 복사본을 2개씩 RAM에 저장한다는 것입니다.
-
-<Tip>
-
-무작위로 생성된 모델은 "비어 있는" (즉 그때 메모리에 있던 것으로 이뤄진) 텐서로 초기화되며 메모리 공간을 차지합니다. 초기화된 모델/파라미터의 종류에 적합한 분포(예: 정규 분포)에 따른 무작위 초기화는 가능한 한 빠르게 하기 위해 초기화되지 않은 가중치에 대해 3단계 이후에만 수행됩니다!
-
-</Tip>
-
-이 안내서에서는 Transformers가 이 문제를 해결하기 위해 제공하는 솔루션을 살펴봅니다. 주의할 점은 아직 활발히 개발 중인 분야이므로 여기서 설명하는 API가 앞으로 약간 변경될 수 있다는 것입니다.
-
-## 샤딩된 체크포인트 [[sharded-checkpoints]]
-
-4.18.0 버전 이후, 10GB 이상의 공간을 차지하는 모델 체크포인트는 자동으로 작은 조각들로 샤딩됩니다. `model.save_pretrained(save_dir)`를 실행할 때 하나의 단일 체크포인트를 가지게 될 대신, 여러 부분 체크포인트(각각의 크기는 10GB 미만)와 매개변수 이름을 해당 파일에 매핑하는 인덱스가 생성됩니다.
-
-`max_shard_size` 매개변수로 샤딩 전 최대 크기를 제어할 수 있으므로, 이 예제를 위해 샤드 크기가 작은 일반 크기의 모델을 사용하겠습니다: 전통적인 BERT 모델을 사용해 봅시다.
-
-```py
-from transformers import AutoModel
-
-model = AutoModel.from_pretrained("google-bert/bert-base-cased")
-```
-
-[`~PreTrainedModel.save_pretrained`]을 사용하여 모델을 저장하면, 모델의 구성과 가중치가 들어있는 두 개의 파일이 있는 새 폴더가 생성됩니다:
-
-```py
->>> import os
->>> import tempfile
-
->>> with tempfile.TemporaryDirectory() as tmp_dir:
-...     model.save_pretrained(tmp_dir)
-...     print(sorted(os.listdir(tmp_dir)))
-['config.json', 'pytorch_model.bin']
-```
-
-이제 최대 샤드 크기를 200MB로 사용해 봅시다:
-
-```py
->>> with tempfile.TemporaryDirectory() as tmp_dir:
-...     model.save_pretrained(tmp_dir, max_shard_size="200MB")
-...     print(sorted(os.listdir(tmp_dir)))
-['config.json', 'pytorch_model-00001-of-00003.bin', 'pytorch_model-00002-of-00003.bin', 'pytorch_model-00003-of-00003.bin', 'pytorch_model.bin.index.json']
-```
-
-모델의 구성에 더해, 세 개의 다른 가중치 파일과 파라미터 이름과 해당 파일의 매핑이 포함된 `index.json` 파일을 볼 수 있습니다. 이러한 체크포인트는 [`~PreTrainedModel.from_pretrained`] 메서드를 사용하여 완전히 다시 로드할 수 있습니다:
-
-```py
->>> with tempfile.TemporaryDirectory() as tmp_dir:
-...     model.save_pretrained(tmp_dir, max_shard_size="200MB")
-...     new_model = AutoModel.from_pretrained(tmp_dir)
-```
-
-큰 모델의 경우 이러한 방식으로 처리하는 주된 장점은 위에서 보여준 흐름의 2단계에서, 각 샤드가 이전 샤드 다음에 로드되므로 메모리 사용량이 모델 크기와 가장 큰 샤드의 크기를 초과하지 않는다는 점입니다.
-
-이 인덱스 파일은 키가 체크포인트에 있는지, 그리고 해당 가중치가 어디에 저장되어 있는지를 결정하는 데 사용됩니다. 이 인덱스를 json과 같이 로드하고 딕셔너리를 얻을 수 있습니다:
-
-```py
->>> import json
-
->>> with tempfile.TemporaryDirectory() as tmp_dir:
-...     model.save_pretrained(tmp_dir, max_shard_size="200MB")
-...     with open(os.path.join(tmp_dir, "pytorch_model.bin.index.json"), "r") as f:
-...         index = json.load(f)
-
->>> print(index.keys())
-dict_keys(['metadata', 'weight_map'])
-```
-
-메타데이터는 현재 모델의 총 크기만 포함됩니다. 앞으로 다른 정보를 추가할 계획입니다:
-
-```py
->>> index["metadata"]
-{'total_size': 433245184}
-```
-
-가중치 맵은 이 인덱스의 주요 부분으로, 각 매개변수 이름(PyTorch 모델 `state_dict`에서 보통 찾을 수 있는)을 해당 파일에 매핑합니다:
-
-```py
->>> index["weight_map"]
-{'embeddings.LayerNorm.bias': 'pytorch_model-00001-of-00003.bin',
- 'embeddings.LayerNorm.weight': 'pytorch_model-00001-of-00003.bin',
- ...
-```
-
-만약 [`~PreTrainedModel.from_pretrained`]를 사용하지 않고 모델 내에서 이러한 샤딩된 체크포인트를 직접 가져오려면 (전체 체크포인트를 위해 `model.load_state_dict()`를 수행하는 것처럼), [`~modeling_utils.load_sharded_checkpoint`]를 사용해야 합니다.
-
-```py
->>> from transformers.modeling_utils import load_sharded_checkpoint
-
->>> with tempfile.TemporaryDirectory() as tmp_dir:
-...     model.save_pretrained(tmp_dir, max_shard_size="200MB")
-...     load_sharded_checkpoint(model, tmp_dir)
-```
-
-## 저(低)메모리 로딩 [[low-memory-loading]]
-
-샤딩된 체크포인트는 위에서 언급한 작업 흐름의 2단계에서 메모리 사용량을 줄이지만, 저(低)메모리 설정에서 모델을 사용하기 위해 우리의 Accelerate 라이브러리를 기반으로 한 도구를 활용하는 것이 좋습니다.
-
-자세한 사항은 다음 가이드를 참조해주세요: [Accelerate로 대규모 모델 가져오기 (영문)](../en/main_classes/model#large-model-loading)
--- a/docs/source/ko/create_a_model.md
+++ b/docs/source/ko/create_a_model.md
@ -1,388 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 맞춤형 아키텍처 만들기[[create-a-custom-architecture]]
-
-[`AutoClass`](model_doc/auto)는 모델 아키텍처를 자동으로 추론하고 미리 학습된 configuration과 가중치를 다운로드합니다. 일반적으로 체크포인트에 구애받지 않는 코드를 생성하려면 `AutoClass`를 사용하는 것이 좋습니다. 하지만 특정 모델 파라미터를 보다 세밀하게 제어하고자 하는 사용자는 몇 가지 기본 클래스만으로 커스텀 🤗 Transformers 모델을 생성할 수 있습니다. 이는 🤗 Transformers 모델을 연구, 교육 또는 실험하는 데 관심이 있는 모든 사용자에게 특히 유용할 수 있습니다. 이 가이드에서는 'AutoClass'를 사용하지 않고 커스텀 모델을 만드는 방법에 대해 알아보겠습니다:
-
- 모델 configuration을 가져오고 사용자 지정합니다.
- 모델 아키텍처를 생성합니다.
- 텍스트에 사용할 느리거나 빠른 토큰화기를 만듭니다.
- 비전 작업을 위한 이미지 프로세서를 생성합니다.
- 오디오 작업을 위한 특성 추출기를 생성합니다.
- 멀티모달 작업용 프로세서를 생성합니다.
-
-## Configuration[[configuration]]
-
-[configuration](main_classes/configuration)은 모델의 특정 속성을 나타냅니다. 각 모델 구성에는 서로 다른 속성이 있습니다. 예를 들어, 모든 NLP 모델에는 `hidden_size`, `num_attention_heads`, `num_hidden_layers` 및 `vocab_size` 속성이 공통으로 있습니다. 이러한 속성은 모델을 구성할 attention heads 또는 hidden layers의 수를 지정합니다.
-
-[DistilBERT](model_doc/distilbert) 속성을 검사하기 위해 [`DistilBertConfig`]에 접근하여 자세히 살펴봅니다:
-
-```py
->>> from transformers import DistilBertConfig
-
->>> config = DistilBertConfig()
->>> print(config)
-DistilBertConfig {
-  "activation": "gelu",
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
-  "initializer_range": 0.02,
-  "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
-  "pad_token_id": 0,
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "transformers_version": "4.16.2",
-  "vocab_size": 30522
-}
-```
-
-[`DistilBertConfig`]는 기본 [`DistilBertModel`]을 빌드하는 데 사용되는 모든 기본 속성을 표시합니다. 모든 속성은 커스터마이징이 가능하므로 실험을 위한 공간을 만들 수 있습니다. 예를 들어 기본 모델을 다음과 같이 커스터마이즈할 수 있습니다:
-
- `activation` 파라미터로 다른 활성화 함수를 사용해 보세요.
- `attention_dropout` 파라미터를 사용하여 어텐션 확률에 더 높은 드롭아웃 비율을 사용하세요.
-
-```py
->>> my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
->>> print(my_config)
-DistilBertConfig {
-  "activation": "relu",
-  "attention_dropout": 0.4,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
-  "initializer_range": 0.02,
-  "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
-  "pad_token_id": 0,
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "transformers_version": "4.16.2",
-  "vocab_size": 30522
-}
-```
-
-사전 학습된 모델 속성은 [`~PretrainedConfig.from_pretrained`] 함수에서 수정할 수 있습니다:
-
-```py
->>> my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
-```
-
-모델 구성이 만족스러우면 [`~PretrainedConfig.save_pretrained`]로 저장할 수 있습니다. 설정 파일은 지정된 작업 경로에 JSON 파일로 저장됩니다:
-
-```py
->>> my_config.save_pretrained(save_directory="./your_model_save_path")
-```
-
-configuration 파일을 재사용하려면 [`~PretrainedConfig.from_pretrained`]를 사용하여 가져오세요:
-
-```py
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
-```
-
-<Tip>
-
-configuration 파일을 딕셔너리로 저장하거나 사용자 정의 configuration 속성과 기본 configuration 속성의 차이점만 저장할 수도 있습니다! 자세한 내용은 [configuration](main_classes/configuration) 문서를 참조하세요.
-
-</Tip>
-
-## 모델[[model]]
-
-다음 단계는 [모델(model)](main_classes/models)을 만드는 것입니다. 느슨하게 아키텍처라고도 불리는 모델은 각 계층이 수행하는 동작과 발생하는 작업을 정의합니다. configuration의 `num_hidden_layers`와 같은 속성은 아키텍처를 정의하는 데 사용됩니다. 모든 모델은 기본 클래스 [`PreTrainedModel`]과 입력 임베딩 크기 조정 및 셀프 어텐션 헤드 가지 치기와 같은 몇 가지 일반적인 메소드를 공유합니다. 또한 모든 모델은 [`torch.nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) 또는 [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html)의 서브클래스이기도 합니다. 즉, 모델은 각 프레임워크의 사용법과 호환됩니다.
-
-<frameworkcontent>
-<pt>
-사용자 지정 configuration 속성을 모델에 가져옵니다:
-
-```py
->>> from transformers import DistilBertModel
-
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
->>> model = DistilBertModel(my_config)
-```
-
-이제 사전 학습된 가중치 대신 임의의 값을 가진 모델이 생성됩니다. 이 모델을 훈련하기 전까지는 유용하게 사용할 수 없습니다. 훈련은 비용과 시간이 많이 소요되는 프로세스입니다. 일반적으로 훈련에 필요한 리소스의 일부만 사용하면서 더 나은 결과를 더 빨리 얻으려면 사전 훈련된 모델을 사용하는 것이 좋습니다.
-
-사전 학습된 모델을 [`~PreTrainedModel.from_pretrained`]로 생성합니다:
-
-```py
->>> model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-🤗 Transformers에서 제공한 모델의 사전 학습된 가중치를 사용하는 경우 기본 모델 configuration을 자동으로 불러옵니다. 그러나 원하는 경우 기본 모델 configuration 속성의 일부 또는 전부를 사용자 지정으로 바꿀 수 있습니다:
-
-```py
->>> model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
-```
-</pt>
-<tf>
-사용자 지정 configuration 속성을 모델에 불러옵니다:
-
-```py
->>> from transformers import TFDistilBertModel
-
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
->>> tf_model = TFDistilBertModel(my_config)
-```
-
-이제 사전 학습된 가중치 대신 임의의 값을 가진 모델이 생성됩니다. 이 모델을 훈련하기 전까지는 유용하게 사용할 수 없습니다. 훈련은 비용과 시간이 많이 소요되는 프로세스입니다. 일반적으로 훈련에 필요한 리소스의 일부만 사용하면서 더 나은 결과를 더 빨리 얻으려면 사전 훈련된 모델을 사용하는 것이 좋습니다.
-
-사전 학습된 모델을 [`~TFPreTrainedModel.from_pretrained`]로 생성합니다:
-
-```py
->>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-🤗 Transformers에서 제공한 모델의 사전 학습된 가중치를 사용하는 경우 기본 모델 configuration을 자동으로 불러옵니다. 그러나 원하는 경우 기본 모델 configuration 속성의 일부 또는 전부를 사용자 지정으로 바꿀 수 있습니다:
-
-```py
->>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
-```
-</tf>
-</frameworkcontent>
-
-### 모델 헤드[[model-heads]]
-
-이 시점에서 *은닉 상태(hidden state)*를 출력하는 기본 DistilBERT 모델을 갖게 됩니다. 은닉 상태는 최종 출력을 생성하기 위해 모델 헤드에 입력으로 전달됩니다. 🤗 Transformers는 모델이 해당 작업을 지원하는 한 각 작업마다 다른 모델 헤드를 제공합니다(즉, 번역과 같은 시퀀스 간 작업에는 DistilBERT를 사용할 수 없음).
-
-<frameworkcontent>
-<pt>
-예를 들어, [`DistilBertForSequenceClassification`]은 시퀀스 분류 헤드가 있는 기본 DistilBERT 모델입니다. 시퀀스 분류 헤드는 풀링된 출력 위에 있는 선형 레이어입니다.
-
-```py
->>> from transformers import DistilBertForSequenceClassification
-
->>> model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-다른 모델 헤드로 전환하여 이 체크포인트를 다른 작업에 쉽게 재사용할 수 있습니다. 질의응답 작업의 경우, [`DistilBertForQuestionAnswering`] 모델 헤드를 사용할 수 있습니다. 질의응답 헤드는 숨겨진 상태 출력 위에 선형 레이어가 있다는 점을 제외하면 시퀀스 분류 헤드와 유사합니다.
-
-```py
->>> from transformers import DistilBertForQuestionAnswering
-
->>> model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-</pt>
-<tf>
-예를 들어, [`TFDistilBertForSequenceClassification`]은 시퀀스 분류 헤드가 있는 기본 DistilBERT 모델입니다. 시퀀스 분류 헤드는 풀링된 출력 위에 있는 선형 레이어입니다.
-
-```py
->>> from transformers import TFDistilBertForSequenceClassification
-
->>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-다른 모델 헤드로 전환하여 이 체크포인트를 다른 작업에 쉽게 재사용할 수 있습니다. 질의응답 작업의 경우, [`TFDistilBertForQuestionAnswering`] 모델 헤드를 사용할 수 있습니다. 질의응답 헤드는 숨겨진 상태 출력 위에 선형 레이어가 있다는 점을 제외하면 시퀀스 분류 헤드와 유사합니다.
-
-```py
->>> from transformers import TFDistilBertForQuestionAnswering
-
->>> tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-</tf>
-</frameworkcontent>
-
-## 토크나이저[[tokenizer]]
-
-텍스트 데이터에 모델을 사용하기 전에 마지막으로 필요한 기본 클래스는 원시 텍스트를 텐서로 변환하는 [토크나이저](main_classes/tokenizer)입니다. 🤗 Transformers에 사용할 수 있는 토크나이저는 두 가지 유형이 있습니다:
-
- [`PreTrainedTokenizer`]: 파이썬으로 구현된 토크나이저입니다.
- [`PreTrainedTokenizerFast`]: Rust 기반 [🤗 Tokenizer](https://huggingface.co/docs/tokenizers/python/latest/) 라이브러리로 만들어진 토크나이저입니다. 이 토크나이저는 Rust로 구현되어 배치 토큰화에서 특히 빠릅니다. 빠른 토크나이저는 토큰을 원래 단어나 문자에 매핑하는 *오프셋 매핑*과 같은 추가 메소드도 제공합니다.
-두 토크나이저 모두 인코딩 및 디코딩, 새 토큰 추가, 특수 토큰 관리와 같은 일반적인 방법을 지원합니다.
-
-<Tip warning={true}>
-
-모든 모델이 빠른 토크나이저를 지원하는 것은 아닙니다. 이 [표](index#supported-frameworks)에서 모델의 빠른 토크나이저 지원 여부를 확인하세요.
-
-</Tip>
-
-토크나이저를 직접 학습한 경우, *어휘(vocabulary)* 파일에서 토크나이저를 만들 수 있습니다:
-
-```py
->>> from transformers import DistilBertTokenizer
-
->>> my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
-```
-
-사용자 지정 토크나이저의 어휘는 사전 학습된 모델의 토크나이저에서 생성된 어휘와 다를 수 있다는 점을 기억하는 것이 중요합니다. 사전 학습된 모델을 사용하는 경우 사전 학습된 모델의 어휘를 사용해야 하며, 그렇지 않으면 입력이 의미를 갖지 못합니다. [`DistilBertTokenizer`] 클래스를 사용하여 사전 학습된 모델의 어휘로 토크나이저를 생성합니다:
-
-```py
->>> from transformers import DistilBertTokenizer
-
->>> slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-[`DistilBertTokenizerFast`] 클래스로 빠른 토크나이저를 생성합니다:
-
-```py
->>> from transformers import DistilBertTokenizerFast
-
->>> fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-<Tip>
-
-[`AutoTokenizer`]는 기본적으로 빠른 토크나이저를 가져오려고 합니다. 이 동작을 비활성화하려면 `from_pretrained`에서 `use_fast=False`를 설정하면 됩니다.
-
-</Tip>
-
-## 이미지 프로세서[[image-processor]]
-
-이미지 프로세서(image processor)는 비전 입력을 처리합니다. 기본 [`~image_processing_utils.ImageProcessingMixin`] 클래스에서 상속합니다.
-
-사용하려면 사용 중인 모델과 연결된 이미지 프로세서를 생성합니다. 예를 들어, 이미지 분류에 [ViT](model_doc/vit)를 사용하는 경우 기본 [`ViTImageProcessor`]를 생성합니다:
-
-```py
->>> from transformers import ViTImageProcessor
-
->>> vit_extractor = ViTImageProcessor()
->>> print(vit_extractor)
-ViTImageProcessor {
-  "do_normalize": true,
-  "do_resize": true,
-  "feature_extractor_type": "ViTImageProcessor",
-  "image_mean": [
-    0.5,
-    0.5,
-    0.5
-  ],
-  "image_std": [
-    0.5,
-    0.5,
-    0.5
-  ],
-  "resample": 2,
-  "size": 224
-}
-```
-
-<Tip>
-
-사용자 지정을 원하지 않는 경우 `from_pretrained` 메소드를 사용하여 모델의 기본 이미지 프로세서 매개변수를 불러오면 됩니다.
-
-</Tip>
-
-사용자 지정 이미지 프로세서를 생성하려면 [`ViTImageProcessor`] 파라미터를 수정합니다:
-
-```py
->>> from transformers import ViTImageProcessor
-
->>> my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
->>> print(my_vit_extractor)
-ViTImageProcessor {
-  "do_normalize": false,
-  "do_resize": true,
-  "feature_extractor_type": "ViTImageProcessor",
-  "image_mean": [
-    0.3,
-    0.3,
-    0.3
-  ],
-  "image_std": [
-    0.5,
-    0.5,
-    0.5
-  ],
-  "resample": "PIL.Image.BOX",
-  "size": 224
-}
-```
-
-## 특성 추출기[[feature-extractor]]
-
-특성 추출기(feature extractor)는 오디오 입력을 처리합니다. 기본 [`~feature_extraction_utils.FeatureExtractionMixin`] 클래스에서 상속되며, 오디오 입력을 처리하기 위해 [`SequenceFeatureExtractor`] 클래스에서 상속할 수도 있습니다.
-
-사용하려면 사용 중인 모델과 연결된 특성 추출기를 생성합니다. 예를 들어, 오디오 분류에 [Wav2Vec2](model_doc/wav2vec2)를 사용하는 경우 기본 [`Wav2Vec2FeatureExtractor`]를 생성합니다:
-
-```py
->>> from transformers import Wav2Vec2FeatureExtractor
-
->>> w2v2_extractor = Wav2Vec2FeatureExtractor()
->>> print(w2v2_extractor)
-Wav2Vec2FeatureExtractor {
-  "do_normalize": true,
-  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
-  "feature_size": 1,
-  "padding_side": "right",
-  "padding_value": 0.0,
-  "return_attention_mask": false,
-  "sampling_rate": 16000
-}
-```
-
-<Tip>
-
-사용자 지정이 필요하지 않은 경우 `from_pretrained` 메소드를 사용하여 모델의 기본 특성 추출기 ㅁ개변수를 불러 오면 됩니다.
-
-</Tip>
-
-사용자 지정 특성 추출기를 만들려면 [`Wav2Vec2FeatureExtractor`] 매개변수를 수정합니다:
-
-```py
->>> from transformers import Wav2Vec2FeatureExtractor
-
->>> w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
->>> print(w2v2_extractor)
-Wav2Vec2FeatureExtractor {
-  "do_normalize": false,
-  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
-  "feature_size": 1,
-  "padding_side": "right",
-  "padding_value": 0.0,
-  "return_attention_mask": false,
-  "sampling_rate": 8000
-}
-```
-
-
-## 프로세서[[processor]]
-
-멀티모달 작업을 지원하는 모델의 경우, 🤗 Transformers는 특성 추출기 및 토크나이저와 같은 처리 클래스를 단일 객체로 편리하게 래핑하는 프로세서 클래스를 제공합니다. 예를 들어, 자동 음성 인식 작업(Automatic Speech Recognition task (ASR))에 [`Wav2Vec2Processor`]를 사용한다고 가정해 보겠습니다. 자동 음성 인식 작업은 오디오를 텍스트로 변환하므로 특성 추출기와 토크나이저가 필요합니다.
-
-오디오 입력을 처리할 특성 추출기를 만듭니다:
-
-```py
->>> from transformers import Wav2Vec2FeatureExtractor
-
->>> feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
-```
-
-텍스트 입력을 처리할 토크나이저를 만듭니다:
-
-```py
->>> from transformers import Wav2Vec2CTCTokenizer
-
->>> tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
-```
-
-[`Wav2Vec2Processor`]에서 특성 추출기와 토크나이저를 결합합니다:
-
-```py
->>> from transformers import Wav2Vec2Processor
-
->>> processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
-```
-
-configuration과 모델이라는 두 가지 기본 클래스와 추가 전처리 클래스(토크나이저, 이미지 프로세서, 특성 추출기 또는 프로세서)를 사용하면 🤗 Transformers에서 지원하는 모든 모델을 만들 수 있습니다. 이러한 각 기본 클래스는 구성이 가능하므로 원하는 특정 속성을 사용할 수 있습니다. 학습을 위해 모델을 쉽게 설정하거나 기존의 사전 학습된 모델을 수정하여 미세 조정할 수 있습니다.
--- a/docs/source/ko/custom_models.md
+++ b/docs/source/ko/custom_models.md
@ -277,7 +277,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 터미널에서 다음 코드를 실행해 확인할 수 있습니다:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 주피터 노트북의 경우에는 다음과 같습니다:
--- a/docs/source/ko/how_to_hack_models.md
+++ b/docs/source/ko/how_to_hack_models.md
@ -0,0 +1,152 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# 모델 구성 요소 맞춤 설정하기[[customizing-model-components]]
+
+모델을 완전히 새로 작성하는 대신 구성 요소를 수정하여 모델을 맞춤 설정하는 방법이 있습니다. 이 방법으로 모델을 특정 사용 사례에 맞게 모델을 조정할 수 있습니다. 예를 들어, 새로운 레이어를 추가하거나 아키텍처의 어텐션 메커니즘을 최적화할 수 있습니다. 이러한 맞춤 설정은 트랜스포머 모델에 직접 적용되므로, [`Trainer`], [`PreTrainedModel`] 및 [PEFT](https://huggingface.co/docs/peft/en/index) 라이브러리와 같은 기능을 계속 사용할 수 있습니다.
+
+이 가이드에서는 모델의 어텐션 메커니즘을 맞춤 설정하여 [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora)를 적용하는 방법을 설명합니다.
+
+> [!TIP]
+> 모델 코드를 반복적으로 수정하고 개발할 때 [clear_import_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) 유틸리티가 매우 유용합니다. 이 기능은 캐시된 모든 트랜스포머 모듈을 제거하여 Python이 환경을 재시작하지 않고도 수정된 코드를 다시 가져올 수 있도록 합니다.
+>
+> ```py
+> from transformers import AutoModel
+> from transformers.utils.import_utils import clear_import_cache
+>
+> model = AutoModel.from_pretrained("bert-base-uncased")
+> # 모델 코드 수정
+> # 캐시를 지워 수정된 코드를 다시 가져오기
+> clear_import_cache()
+> # 업데이트된 코드를 사용하기 위해 다시 가져오기
+> model = AutoModel.from_pretrained("bert-base-uncased")
+> ```
+
+## 어텐션 클래스[[attention-class]]
+
+[Segment Anything](./model_doc/sam)은 이미지 분할 모델로, 어텐션 메커니즘에서 query-key-value(`qkv`) 프로젝션을 결합합니다. 학습 가능한 파라미터 수와 연산 부담을 줄이기 위해 `qkv` 프로젝션에 LoRA를 적용할 수 있습니다. 이를 위해서는 `qkv` 프로젝션을 분리하여 `q`와 `v`에 LoRA를 개별적으로 적용해야 합니다.
+
+1. 원래의 `SamVisionAttention` 클래스를 상속하여 `SamVisionAttentionSplit`이라는 사용자 정의 어텐션 클래스를 만듭니다. `__init__`에서 결합된 `qkv`를 삭제하고, `q`, `k`, `v`를 위한 개별 선형 레이어를 생성합니다.
+
+```py
+import torch
+import torch.nn as nn
+from transformers.models.sam.modeling_sam import SamVisionAttention
+
+class SamVisionAttentionSplit(SamVisionAttention, nn.Module):
+    def __init__(self, config, window_size):
+        super().__init__(config, window_size)
+        # 결합된 qkv 제거
+        del self.qkv
+        # q, k, v 개별 프로젝션 생성
+        self.q = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
+        self.k = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
+        self.v = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
+        self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook)
+```
+
+2. `_split_qkv_load_hook` 함수는 모델을 가져올 때, 사전 훈련된 `qkv` 가중치를 `q`, `k`, `v`로 분리하여 사전 훈련된 모델과의 호환성을 보장합니다.
+
+```py
+    def split_q_k_v_load_hook(self, state_dict, prefix, *args):
+        keys_to_delete = []
+        for key in list(state_dict.keys()):
+            if "qkv." in key:
+                # 결합된 프로젝션에서 q, k, v 분리
+                q, k, v = state_dict[key].chunk(3, dim=0)
+                # 개별 q, k, v 프로젝션으로 대체
+                state_dict[key.replace("qkv.", "q.")] = q
+                state_dict[key.replace("qkv.", "k.")] = k
+                state_dict[key.replace("qkv.", "v.")] = v
+                # 기존 qkv 키를 삭제 대상으로 표시
+                keys_to_delete.append(key)
+        
+        # 기존 qkv 키 제거
+        for key in keys_to_delete:
+            del state_dict[key]
+```
+
+3. `forward` 단계에서 `q`, `k`, `v`는 개별적으로 계산되며, 어텐션 메커니즘의 나머지 부분은 동일하게 유지됩니다.
+
+```py
+    def forward(self, hidden_states: torch.Tensor, output_attentions=False) -> torch.Tensor:
+        batch_size, height, width, _ = hidden_states.shape
+        qkv_shapes = (batch_size *  self.num_attention_heads,  height * width, -1)
+        query = self.q(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
+        key = self.k(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
+        value = self.v(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
+
+        attn_weights = (query * self.scale) @ key.transpose(-2, -1)
+
+        attn_weights = torch.nn.functional.softmax(attn_weights, dtype=torch.float32, dim=-1).to(query.dtype)
+        attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
+        attn_output = (attn_probs @ value).reshape(batch_size, self.num_attention_heads, height, width, -1)
+        attn_output = attn_output.permute(0, 2, 3, 1, 4).reshape(batch_size, height, width, -1)
+        attn_output = self.proj(attn_output)
+
+        if output_attentions:
+            outputs = (attn_output, attn_weights)
+        else:
+            outputs = (attn_output, None)
+        return outputs
+```
+
+사용자 정의 `SamVisionAttentionSplit` 클래스를 원본 모델의 `SamVisionAttention` 모듈에 할당하여 교체합니다. 모델 내 모든 `SamVisionAttention` 인스턴스는 분리된 어텐션 버전으로 대체됩니다.
+
+[`~PreTrainedModel.from_pretrained`]로 모델을 가져오세요.
+
+```py
+from transformers import SamModel
+
+# 사전 훈련된 SAM 모델 가져오기
+model = SamModel.from_pretrained("facebook/sam-vit-base")
+
+# 비전-인코더 모듈에서 어텐션 클래스 교체
+for layer in model.vision_encoder.layers:
+    if hasattr(layer, "attn"):
+        layer.attn = SamVisionAttentionSplit(model.config.vision_config, model.config.vision_config.window_size)
+```
+
+## LoRA[[lora]]
+
+분리된 `q`, `k`, `v` 프로젝션을 사용할 때 , `q`와 `v`에 LoRA를 적용합니다.
+
+[LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)를 생성하고, 랭크 `r`, `lora_alpha`, `lora_dropout`, `task_type`, 그리고 가장 중요한 적용될 모듈을 지정합니다.
+
+```py
+from peft import LoraConfig, get_peft_model
+
+config = LoraConfig(
+    r=16,
+    lora_alpha=32,
+    # q와 v에 LoRA 적용
+    target_modules=["q", "v"],
+    lora_dropout=0.1,
+    task_type="FEATURE_EXTRACTION"
+)
+```
+
+모델과 [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)를 [get\_peft\_model](https://huggingface.co/docs/peft/package_reference/peft_model#peft.get_peft_model)에 전달하여 모델에 LoRA를 적용합니다.
+
+```py
+model = get_peft_model(model, config)
+```
+
+[print_trainable_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters)를 호출하여 전체 파라미터 수 대비 훈련되는 파라미터 수를 확인하세요.
+
+```py
+model.print_trainable_parameters()
+"trainable params: 589,824 || all params: 94,274,096 || trainable%: 0.6256"
+```
--- a/docs/source/ko/main_classes/peft.md
+++ b/docs/source/ko/main_classes/peft.md
@ -0,0 +1,23 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+-->
+
+# PEFT[[transformers.integrations.PeftAdapterMixin]]
+
+[`~integrations.PeftAdapterMixin`]은 Transformers 라이브러리와 함께 어댑터를 관리할 수 있도록 [PEFT](https://huggingface.co/docs/peft/index) 라이브러리의 함수들을 제공합니다. 이 믹스인은 현재 LoRA, IA3, AdaLora를 지원합니다. 프리픽스 튜닝 방법들(프롬프트 튜닝, 프롬프트 학습)은 torch 모듈에 삽입할 수 없는 구조이므로 지원되지 않습니다.
+
+[[autodoc]] integrations.PeftAdapterMixin
+    - load_adapter
+    - add_adapter
+    - set_adapter
+    - disable_adapters
+    - enable_adapters
+    - active_adapters
+    - get_adapter_state_dict
--- a/docs/source/ko/main_classes/tokenizer.md
+++ b/docs/source/ko/main_classes/tokenizer.md
@ -0,0 +1,82 @@
+<!--Copyright 2020 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# 토크나이저[[tokenizer]]
+
+토크나이저는 모델의 입력을 준비하는 역할을 담당합니다. 이 라이브러리에는 모든 모델을 위한 토크나이저가 포함되어 있습니다. 대부분의 토크나이저는 두 가지 버전으로 제공됩니다. 완전한 파이썬 구현과 Rust 라이브러리 [🤗 Tokenizers](https://github.com/huggingface/tokenizers)에 기반한 "Fast" 구현입니다. "Fast" 구현은 다음을 가능하게 합니다:
+
+1. 특히 배치 토큰화를 수행할 때 속도가 크게 향상됩니다.
+2. 원본 문자열(문자 및 단어)과 토큰 공간 사이를 매핑하는 추가적인 메소드를 제공합니다. (예: 특정 문자를 포함하는 토큰의 인덱스를 얻거나, 특정 토큰에 해당하는 문자 범위를 가져오는 등).
+
+기본 클래스인 [`PreTrainedTokenizer`]와 [`PreTrainedTokenizerFast`]는 문자열 입력을 인코딩하는 메소드를 구현하며(아래 참조), 로컬 파일이나 디렉토리, 또는 라이브러리에서 제공하는 사전 훈련된 토크나이저(HuggingFace의 AWS S3 저장소에서 다운로드된)로부터 파이썬 및 "Fast" 토크나이저를 인스턴스화하거나 저장하는 기능을 제공합니다. 이 두 클래스는 공통 메소드를 포함하는 [`~tokenization_utils_base.PreTrainedTokenizerBase`]와 [`~tokenization_utils_base.SpecialTokensMixin`]에 의존합니다.
+
+[`PreTrainedTokenizer`]와 [`PreTrainedTokenizerFast`]는 모든 토크나이저에서 사용되는 주요 메소드들을 구현합니다:
+
+- 토큰화(문자열을 하위 단어 토큰 문자열로 분할), 토큰 문자열을 ID로 변환 및 그 반대 과정, 그리고 인코딩/디코딩(즉, 토큰화 및 정수로 변환)을 수행합니다.
+- 구조(BPE, SentencePiece 등)에 구애받지 않고 어휘에 새로운 토큰을 추가합니다.
+- 특수 토큰(마스크, 문장 시작 등) 관리: 토큰을 추가하고, 쉽게 접근할 수 있도록 토크나이저의 속성에 할당하며, 토큰화 과정에서 분리되지 않도록 보장합니다.
+
+[`BatchEncoding`]은 [`~tokenization_utils_base.PreTrainedTokenizerBase`]의 인코딩 메소드(`__call__`, `encode_plus`, `batch_encode_plus`)의 출력을 담고 있으며, 파이썬 딕셔너리를 상속받습니다. 토크나이저가 순수 파이썬 토크나이저인 경우 이 클래스는 표준 파이썬 딕셔너리처럼 동작하며, 이러한 메소드들로 계산된 다양한 모델 입력(`input_ids`, `attention_mask` 등)을 갖습니다. 토크나이저가 "Fast" 토크나이저일 경우(즉, HuggingFace [tokenizers 라이브러리](https://github.com/huggingface/tokenizers) 기반일 경우), 이 클래스는 추가적으로 원본 문자열(문자 및 단어)과 토큰 공간 사이를 매핑하는 데 사용할 수 있는 여러 고급 정렬 메소드를 제공합니다 (예: 특정 문자를 포함하는 토큰의 인덱스를 얻거나, 특정 토큰에 해당하는 문자 범위를 얻는 등).
+
+
+# 멀티모달 토크나이저[[multimodal-tokenizer]]
+
+그 외에도 각 토크나이저는 "멀티모달" 토크나이저가 될 수 있으며, 이는 토크나이저가 모든 관련 특수 토큰을 토크나이저 속성의 일부로 저장하여 더 쉽게 접근할 수 있도록 한다는 것을 의미합니다. 예를 들어, LLaVA와 같은 비전-언어 모델에서 토크나이저를 가져오면, `tokenizer.image_token_id`에 접근하여 플레이스홀더로 사용되는 특수 이미지 토큰을 얻을 수 있습니다.
+
+모든 유형의 토크나이저에 추가 특수 토큰을 활성화하려면, 다음 코드를 추가하고 토크나이저를 저장해야 합니다. 추가 특수 토큰은 반드시 특정 모달리티와 관련될 필요는 없으며, 모델이 자주 접근해야 하는 어떤 것이든 될 수 있습니다. 아래 코드에서 `output_dir`에 저장된 토크나이저는 세 개의 추가 특수 토큰에 직접 접근할 수 있게 됩니다.
+
+```python
+vision_tokenizer = AutoTokenizer.from_pretrained(
+    "llava-hf/llava-1.5-7b-hf",
+    extra_special_tokens={"image_token": "<image>", "boi_token": "<image_start>", "eoi_token": "<image_end>"}
+)
+print(vision_tokenizer.image_token, vision_tokenizer.image_token_id)
+("<image>", 32000)
+```
+
+## PreTrainedTokenizer[[transformers.PreTrainedTokenizer]]
+
+[[autodoc]] PreTrainedTokenizer
+    - __call__
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+
+
+## PreTrainedTokenizerFast[[transformers.PreTrainedTokenizerFast]]
+
+[`PreTrainedTokenizerFast`]는 [tokenizers](https://huggingface.co/docs/tokenizers) 라이브러리에 의존합니다. 🤗 tokenizers 라이브러리에서 얻은 토크나이저는
+🤗 transformers로 매우 간단하게 가져올 수 있습니다. 어떻게 하는지 알아보려면 [Using tokenizers from 🤗 tokenizers](../fast_tokenizers) 페이지를 참고하세요.
+
+[[autodoc]] PreTrainedTokenizerFast
+    - __call__
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+
+## BatchEncoding[[transformers.BatchEncoding]]
+
+[[autodoc]] BatchEncoding
--- a/docs/source/ko/model_doc/albert.md
+++ b/docs/source/ko/model_doc/albert.md
@ -0,0 +1,263 @@
+<!--Copyright 2020 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white" >
+        <img alt= "TensorFlow" src= "https://img.shields.io/badge/TensorFlow-FF6F00?style=flat&logo=tensorflow&logoColor=white" >
+        <img alt= "Flax" src="https://img.shields.io/badge/Flax-29a79b.svg?style…Nu+W0m6K/I9gGPd/dfx/EN/wN62AhsBWuAAAAAElFTkSuQmCC">
+        <img alt="SDPA" src= "https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white" > 
+    </div>
+</div>
+
+# ALBERT[[albert]]
+
+[ALBERT](https://huggingface.co/papers/1909.11942)는 [BERT](./bert)의 확장성과 학습 시 메모리 한계를 해결하기 위해 설계된 모델입니다. 이 모델은 두 가지 파라미터 감소 기법을 도입합니다. 첫 번째는 임베딩 행렬 분해(factorized embedding parametrization)로, 큰 어휘 임베딩 행렬을 두 개의 작은 행렬로 분해하여 히든 사이즈를 늘려도 파라미터 수가 크게 증가하지 않도록 합니다. 두 번째는 계층 간 파라미터 공유(cross-layer parameter sharing)로, 여러 계층이 파라미터를 공유하여 학습해야 할 파라미터 수를 줄입니다.
+
+ALBERT는 BERT에서 발생하는 GPU/TPU 메모리 한계, 긴 학습 시간, 갑작스런 성능 저하 문제를 해결하기 위해 만들어졌습니다. ALBERT는 파라미터를 줄이기 위해 두 가지 기법을 사용하여 메모리 사용량을 줄이고 BERT의 학습 속도를 높입니다:
+
+- **임베딩 행렬 분해:** 큰 어휘 임베딩 행렬을 두 개의 더 작은 행렬로 분해하여 메모리 사용량을 줄입니다.
+- **계층 간 파라미터 공유:** 각 트랜스포머 계층마다 별도의 파라미터를 학습하는 대신, 여러 계층이 파라미터를 공유하여 학습해야 할 가중치 수를 더욱 줄입니다.
+
+ALBERT는 BERT와 마찬가지로 절대 위치 임베딩(absolute position embeddings)을 사용하므로, 입력 패딩은 오른쪽에 적용해야 합니다. 임베딩 크기는 128이며, BERT의 768보다 작습니다. ALBERT는 한 번에 최대 512개의 토큰을 처리할 수 있습니다.
+
+모든 공식 ALBERT 체크포인트는 [ALBERT 커뮤니티](https://huggingface.co/albert) 조직에서 확인하실 수 있습니다.
+
+> [!TIP]
+> 오른쪽 사이드바의 ALBERT 모델을 클릭하시면 다양한 언어 작업에 ALBERT를 적용하는 예시를 더 확인하실 수 있습니다.
+
+아래 예시는 [`Pipeline`], [`AutoModel`] 그리고 커맨드라인에서 `[MASK]` 토큰을 예측하는 방법을 보여줍니다.
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import torch
+from transformers import pipeline
+
+pipeline = pipeline(
+    task="fill-mask",
+    model="albert-base-v2",
+    torch_dtype=torch.float16,
+    device=0
+)
+pipeline("식물은 광합성이라고 알려진 과정을 통해 [MASK]를 생성합니다.", top_k=5)
+```
+
+</hfoption>
+<hfoption id="AutoModel">
+
+```py
+import torch
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("albert/albert-base-v2")
+model = AutoModelForMaskedLM.from_pretrained(
+    "albert/albert-base-v2",
+    torch_dtype=torch.float16,
+    attn_implementation="sdpa",
+    device_map="auto"
+)
+
+prompt = "식물은 [MASK]이라고 알려진 과정을 통해 에너지를 생성합니다."
+inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+
+with torch.no_grad():
+    outputs = model(**inputs)
+    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+    predictions = outputs.logits[0, mask_token_index]
+
+top_k = torch.topk(predictions, k=5).indices.tolist()
+for token_id in top_k[0]:
+    print(f"예측: {tokenizer.decode([token_id])}")
+```
+
+</hfoption>
+<hfoption id="transformers CLI">
+
+```bash
+echo -e "Plants create [MASK] through a process known as photosynthesis." | transformers run --task fill-mask --model albert-base-v2 --device 0
+```
+
+</hfoption>
+
+</hfoptions>
+
+## 참고 사항[[notes]]
+
+- BERT는 절대 위치 임베딩을 사용하므로, 오른쪽에 입력이 패딩돼야 합니다.
+- 임베딩 크기 `E`는 히든 크기 `H`와 다릅니다. 임베딩은 문맥에 독립적(각 토큰마다 하나의 임베딩 벡터)이고, 은닉 상태는 문맥에 의존적(토큰 시퀀스마다 하나의 은닉 상태)입니다. 임베딩 행렬은 `V x E`(V: 어휘 크기)이므로, 일반적으로 `H >> E`가 더 논리적입니다. `E < H`일 때 모델 파라미터가 더 적어집니다.
+
+## 참고 자료[[resources]]
+
+아래 섹션의 자료들은 공식 Hugging Face 및 커뮤니티(🌎 표시) 자료로, AlBERT를 시작하는 데 도움이 됩니다. 여기에 추가할 자료가 있다면 Pull Request를 보내주세요! 기존 자료와 중복되지 않고 새로운 내용을 담고 있으면 좋습니다.
+
+<PipelineTag pipeline="text-classification"/>
+
+- [`AlbertForSequenceClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/pytorch/text-classification)에서 지원됩니다.
+
+- [`TFAlbertForSequenceClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/text-classification)에서 지원됩니다.
+
+- [`FlaxAlbertForSequenceClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/flax/text-classification)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_flax.ipynb)에서 지원됩니다.
+- [텍스트 분류 작업 가이드](../tasks/sequence_classification)에서 모델 사용법을 확인하세요.
+
+<PipelineTag pipeline="token-classification"/>
+
+- [`AlbertForTokenClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/pytorch/token-classification)에서 지원됩니다.
+
+- [`TFAlbertForTokenClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/token-classification)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb)에서 지원됩니다.
+
+- [`FlaxAlbertForTokenClassification`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/flax/token-classification)에서 지원됩니다.
+- 🤗 Hugging Face의 [토큰 분류](https://huggingface.co/course/chapter7/2?fw=pt) 강좌
+- [토큰 분류 작업 가이드](../tasks/token_classification)에서 모델 사용법을 확인하세요.
+
+<PipelineTag pipeline="fill-mask"/>
+
+- [`AlbertForMaskedLM`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/pytorch/language-modeling#robertabertdistilbert-and-masked-language-modeling)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)에서 지원됩니다.
+- [`TFAlbertForMaskedLM`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/language-modeling#run_mlmpy)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling-tf.ipynb)에서 지원됩니다.
+- [`FlaxAlbertForMaskedLM`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/flax/language-modeling#masked-language-modeling)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/masked_language_modeling_flax.ipynb)에서 지원됩니다.
+- 🤗 Hugging Face의 [마스킹 언어 모델링](https://huggingface.co/course/chapter7/3?fw=pt) 강좌
+- [마스킹 언어 모델링 작업 가이드](../tasks/masked_language_modeling)에서 모델 사용법을 확인하세요.
+
+<PipelineTag pipeline="question-answering"/>
+
+- [`AlbertForQuestionAnswering`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/pytorch/question-answering)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)에서 지원됩니다.
+- [`TFAlbertForQuestionAnswering`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/question-answering)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering-tf.ipynb)에서 지원됩니다.
+- [`FlaxAlbertForQuestionAnswering`]은 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/flax/question-answering)에서 지원됩니다.
+- [질의응답](https://huggingface.co/course/chapter7/7?fw=pt) 🤗 Hugging Face 강좌의 챕터.
+- [질의응답 작업 가이드](../tasks/question_answering)에서 모델 사용법을 확인하세요.
+
+**다중 선택(Multiple choice)**
+
+- [`AlbertForMultipleChoice`]는 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)에서 지원됩니다.
+- [`TFAlbertForMultipleChoice`]는 이 [예제 스크립트](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/multiple-choice)와 [노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice-tf.ipynb)에서 지원됩니다.
+
+- [다중 선택 작업 가이드](../tasks/multiple_choice)에서 모델 사용법을 확인하세요.
+
+## AlbertConfig[[albertconfig]]
+
+[[autodoc]] AlbertConfig
+
+## AlbertTokenizer[[alberttokenizer]]
+
+[[autodoc]] AlbertTokenizer - build_inputs_with_special_tokens - get_special_tokens_mask - create_token_type_ids_from_sequences - save_vocabulary
+
+## AlbertTokenizerFast[[alberttokenizerfast]]
+
+[[autodoc]] AlbertTokenizerFast
+
+## Albert 특화 출력[[albert-specific-outputs]]
+
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+<frameworkcontent>
+<pt>
+
+## AlbertModel[[albertmodel]]
+
+[[autodoc]] AlbertModel - forward
+
+## AlbertForPreTraining[[albertforpretraining]]
+
+[[autodoc]] AlbertForPreTraining - forward
+
+## AlbertForMaskedLM[[albertformaskedlm]]
+
+[[autodoc]] AlbertForMaskedLM - forward
+
+## AlbertForSequenceClassification[[albertforsequenceclassification]]
+
+[[autodoc]] AlbertForSequenceClassification - forward
+
+## AlbertForMultipleChoice[[albertformultiplechoice]]
+
+[[autodoc]] AlbertForMultipleChoice
+
+## AlbertForTokenClassification[[albertfortokenclassification]]
+
+[[autodoc]] AlbertForTokenClassification - forward
+
+## AlbertForQuestionAnswering[[albertforquestionanswering]]
+
+[[autodoc]] AlbertForQuestionAnswering - forward
+
+</pt>
+
+<tf>
+
+## TFAlbertModel[[tfalbertmodel]]
+
+[[autodoc]] TFAlbertModel - call
+
+## TFAlbertForPreTraining[[tfalbertforpretraining]]
+
+[[autodoc]] TFAlbertForPreTraining - call
+
+## TFAlbertForMaskedLM[[tfalbertformaskedlm]]
+
+[[autodoc]] TFAlbertForMaskedLM - call
+
+## TFAlbertForSequenceClassification[[tfalbertforsequenceclassification]]
+
+[[autodoc]] TFAlbertForSequenceClassification - call
+
+## TFAlbertForMultipleChoice[[tfalbertformultiplechoice]]
+
+[[autodoc]] TFAlbertForMultipleChoice - call
+
+## TFAlbertForTokenClassification[[tfalbertfortokenclassification]]
+
+[[autodoc]] TFAlbertForTokenClassification - call
+
+## TFAlbertForQuestionAnswering[[tfalbertforquestionanswering]]
+
+[[autodoc]] TFAlbertForQuestionAnswering - call
+
+</tf>
+<jax>
+
+## FlaxAlbertModel[[flaxalbertmodel]]
+
+[[autodoc]] FlaxAlbertModel - **call**
+
+## FlaxAlbertForPreTraining[[flaxalbertforpretraining]]
+
+[[autodoc]] FlaxAlbertForPreTraining - **call**
+
+## FlaxAlbertForMaskedLM[[flaxalbertformaskedlm]]
+
+[[autodoc]] FlaxAlbertForMaskedLM - **call**
+
+## FlaxAlbertForSequenceClassification[[flaxalbertforsequenceclassification]]
+
+[[autodoc]] FlaxAlbertForSequenceClassification - **call**
+
+## FlaxAlbertForMultipleChoice[[flaxalbertformultiplechoice]]
+
+[[autodoc]] FlaxAlbertForMultipleChoice - **call**
+
+## FlaxAlbertForTokenClassification[[flaxalbertfortokenclassification]]
+
+[[autodoc]] FlaxAlbertForTokenClassification - **call**
+
+## FlaxAlbertForQuestionAnswering[[flaxalbertforquestionanswering]]
+
+[[autodoc]] FlaxAlbertForQuestionAnswering - **call**
+
+</jax>
+</frameworkcontent>
--- a/docs/source/ko/model_doc/exaone4.md
+++ b/docs/source/ko/model_doc/exaone4.md
@ -0,0 +1,207 @@
+<!--Copyright 2025 The LG AI Research and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# EXAONE 4
+
+## 개요
+
+**[EXAONE 4.0](https://github.com/LG-AI-EXAONE/EXAONE-4.0)** 모델군은 [EXAONE 3.5](https://github.com/LG-AI-EXAONE/EXAONE-3.5) 모델군의 높은 실용성과 [EXAONE Deep](https://github.com/LG-AI-EXAONE/EXAONE-Deep) 모델군의 향상된 사고 추론 능력을 각각 Non-reasoning mode와 Reasoning mode로 통합한 자연어 모델(language model)입니다. 에이전틱(agentic) AI 시대에 발맞춰 EXAONE 4.0은 에이전틱 도구 사용 능력과 같은 핵심 기능을 통합했고, 기존의 다국어 능력을 영어, 한국어와 더불어 스페인어까지 확장했습니다. 
+
+EXAONE 4.0 모델군은 두 개의 모델: 높은 성능을 위해 최적화된 32B 중형 모델, 그리고 온-디바이스 활용을 위해 디자인된 1.2B 소형 모델으로 구성되어 있습니다.
+
+EXAONE 4.0의 모델 구조는 이전 EXAONE 모델들과 다른 아키텍처 디자인을 채택했습니다.
+
+1. **Hybrid Attention**: 32B 모델은 *Local attention (sliding window attention)*과 *Global attention (full attention)*을 3:1 비율로 연결한 hybrid attention 구조를 채택했습니다. 또한 전체 문맥을 더 잘 이해할 수 있도록 global attention에서 RoPE를 사용하지 않았습니다.
+2. **QK-Reorder-Norm**: 더 나은 downstream tasks 성능을 위해 연산량의 증가를 감수하며 전통적으로 사용되고 있던 Pre-LN 방식을 변경했습니다. LayerNorm의 위치를 attention과 MLP의 출력에 적용되도록 재배치했고, Q와 K projection 직후에도 RMS normalization을 추가했습니다. 
+
+더 자세한 정보는 [기술 보고서](https://arxiv.org/abs/2507.11407), [HuggingFace 논문](https://huggingface.co/papers/2507.11407), [블로그](https://www.lgresearch.ai/blog/view?seq=576), [공식 GitHub](https://github.com/LG-AI-EXAONE/EXAONE-4.0) 페이지를 참고해주시길 바랍니다.
+
+공개된 모든 모델 체크포인트는 [HuggingFace 콜렉션](https://huggingface.co/collections/LGAI-EXAONE/exaone-40-686b2e0069800c835ed48375)에서 확인할 수 있습니다.
+
+
+## 모델 세부 정보
+
+| Model Configuration | 32B | 1.2B |
+|:-------------------|:-----:|:------:|
+| d_model | 5,120 | 2,048 |
+| Number of layers | 64 | 30 |
+| Normalization | QK-Reorder-LN | QK-Reorder-LN |
+| Non-linearity | SwiGLU | SwiGLU |
+| Feedforward dimension | 27,392 | 4,096 |
+| Attention type | Hybrid (3:1 Local-Global) | Global |
+| Head type | GQA | GQA |
+| Number of heads | 40 | 32 |
+| Number of KV heads | 8 | 8 |
+| Head size | 128 | 64 |
+| Max sequence length | 131,072 | 65,536 |
+| RoPE theta | 1,000,000 | 1,000,000 |
+| Tokenizer | BBPE | BBPE |
+| Vocab size | 102,400 | 102,400 |
+| Tied word embedding | False | True |
+| Knowledge cut-off | Nov. 2024 | Nov. 2024 |
+
+
+## 사용 팁
+
+### Non-reasoning mode
+
+일반적인 대화의 경우 아래 예제와 같이 EXAONE 4.0을 사용할 수 있습니다.
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_name = "LGAI-EXAONE/EXAONE-4.0-32B"
+
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="bfloat16",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+# 원하는 입력을 선택하세요
+prompt = "Explain how wonderful you are"
+prompt = "Explica lo increíble que eres"
+prompt = "너가 얼마나 대단한지 설명해 봐"
+
+messages = [
+    {"role": "user", "content": prompt}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt"
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=128,
+    do_sample=False,
+)
+print(tokenizer.decode(output[0]))
+```
+
+### Reasoning mode
+
+The EXAONE 4.0 models have reasoning capabilities for handling complex problems. You can activate reasoning mode by using the `enable_thinking=True` argument with the tokenizer, which opens a reasoning block that starts with `<think>` tag without closing it.
+
+EXAONE 4.0 모델군은 복잡한 문제를 해결하기 위한 사고 추론 능력을 갖추고 있습니다. 토크나이저에서 `enable_thinking=True` 인자를 사용해서 reasoning mode로 모델을 사용할 수 있습니다. 이 경우 `<think>` 토큰으로 추론 블록을 연 뒤, 닫지 않고 추론을 시작합니다.
+
+```python
+messages = [
+    {"role": "user", "content": "Which one is bigger, 3.12 vs 3.9?"}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt",
+    enable_thinking=True,
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=128,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.95
+)
+print(tokenizer.decode(output[0]))
+```
+
+> [!IMPORTANT]
+> 모델을 reasoning mode로 사용할 경우, 생성되는 답변이 sampling parameters에 굉장히 민감합니다. 따라서 더 나은 생성 품질을 위해 공식 [Usage Guideline](https://github.com/LG-AI-EXAONE/EXAONE-4.0#usage-guideline)를 참조해 주시길 바랍니다.
+
+### Agentic tool use
+
+EXAONE 4.0 모델은 도구 사용 능력을 갖춘 덕분에 Agent로 사용할 수 있습니다. 이를 위해서는 아래 예제와 같이 도구 명세를 모델에게 제공해 주어야 합니다.
+
+```python
+import random
+
+def roll_dice(max_num: int):
+    return random.randint(1, max_num)
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "roll_dice",
+            "description": "Roll a dice with the number 1 to N. User can select the number N.",
+            "parameters": {
+                "type": "object",
+                "required": ["max_num"],
+                "properties": {
+                    "max_num": {
+                        "type": "int",
+                        "description": "Max number of the dice"
+                    }
+                }
+            }
+        }
+    }
+]
+
+messages = [
+    {"role": "user", "content": "Roll D6 dice twice!"}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt",
+    tools=tools,
+)
+
+output = model.generate(
+    input_ids.to(model.device),
+    max_new_tokens=1024,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.95,
+)
+print(tokenizer.decode(output[0]))
+```
+
+## Exaone4Config
+
+[[autodoc]] Exaone4Config
+
+## Exaone4Model
+
+[[autodoc]] Exaone4Model
+    - forward
+
+## Exaone4ForCausalLM
+
+[[autodoc]] Exaone4ForCausalLM
+    - forward
+
+## Exaone4ForSequenceClassification
+
+[[autodoc]] Exaone4ForSequenceClassification
+    - forward
+
+## Exaone4ForTokenClassification
+
+[[autodoc]] Exaone4ForTokenClassification
+    - forward
+
+## Exaone4ForQuestionAnswering
+
+[[autodoc]] Exaone4ForQuestionAnswering
+    - forward
--- a/docs/source/ko/model_doc/tvp.md
+++ b/docs/source/ko/model_doc/tvp.md
@ -0,0 +1,189 @@
+<!--Copyright 2023 The Intel Team Authors and HuggingFace Inc. team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# TVP [[tvp]]
+
+<div class="flex flex-wrap space-x-1">
+<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+</div>
+
+## 개요 [[overview]]
+
+Text-Visual Prompting(TVP) 프레임워크는 Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding이 발표한 논문 [Text-Visual Prompting for Efficient 2D Temporal Video Grounding](https://huggingface.co/papers/2303.04995)에서 제안되었습니다.
+
+논문의 초록은 다음과 같습니다:
+
+*본 논문에서는 길고, 편집되지 않은 비디오에서 문장으로 설명된 순간의 시작/종료 시점을 예측하는 것을 목표로 하는 Temporal Video Grounding(TVG) 문제를 다룹니다. 세밀한 3D 시각적 특징 덕분에 TVG 기술은 최근 몇 년 동안 놀라운 발전을 이뤘습니다. 하지만 3D 합성곱 신경망(CNN)의 높은 복잡성으로 인해 밀도 높은 3D 시각적 특징을 추출하는 데 시간이 오래 걸리고 그만큼 많은 메모리와 연산 자원을 필요로 합니다. 효율적인 TVG를 위해, 본 논문에서는 TVG 모델의 시각적 입력과 텍스트 특징 모두에 최적화된 교란 패턴('프롬프트'라고 부름)을 통합하는 새로운 Text-Visual Prompting(TVP) 프레임워크를 제안합니다. 3D CNN과 뚜렷이 대비되게 TVP가 2D TVG 모델에서 비전 인코더와 언어 인코더를 효과적으로 공동 학습할 수 있게 하고, 낮은 복잡도의 희소한 2D 시각적 특징만을 사용하여 크로스 모달 특징 융합의 성능을 향상시킵니다. 더 나아가, TVG의 효율적인 학습을 위해 Temporal-Distance IoU(TDIoU) 손실 함수를 제안합니다. 두 개의 벤치마크 데이터 세트인 Charades-STA와 ActivityNet Captions 데이터셋에 대한 실험을 통해, 제안된 TVP가 2D TVG의 성능을 크게 향상시키고(예: Charades-STA에서 9.79% 향상, ActivityNet Captions에서 30.77% 향상) 3D 시각적 특징을 사용하는 TVG에 비해 5배의 추론 가속을 달성함을 실험적으로 입증합니다.*
+
+이 연구는 Temporal Video Grounding(TVG)을 다룹니다. TVG는 문장으로 설명된 특정 이벤트의 시작 및 종료 시점을 긴 비디오에서 정확히 찾아내는 과정입니다. TVG 성능을 향상시키기 위해 Text-Visual Prompting(TVP)이 제안되었습니다. TVP는 '프롬프트'라고 알려진 특별히 설계된 패턴을 TVG 모델의 시각적(이미지 기반) 및 텍스트(단어 기반) 입력 구성 요소 모두에 통합하는 것을 방식입니다. 이 프롬프트는 추가적인 시공간적 컨텍스트를 제공함으로써 모델이 비디오 내 이벤트 시점의 예측 정확도를 높입니다. 이 접근 방식은 3D 시각적 입력 대신 2D 입력을 사용합니다. 3D 입력은 보다 풍부한 시공간적 세부 정보를 제공하지만 처리하는 데 시간이 더 많이 걸립니다. 따라서 프롬프팅 메소드와 함께 2D 입력을 사용하여 이와 유사한 수준의 컨텍스트와 정확도를 더 효율적으로 제공하는 것을 목표로 합니다.
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/tvp_architecture.png"
+alt="drawing" width="600"/>
+
+<small> TVP 아키텍처. <a href="https://huggingface.co/papers/2303.04995">원본 논문에서 발췌.</a> </small>
+
+이 모델은 [Jiqing Feng](https://huggingface.co/Jiqing)님이 기여했습니다. 원본 코드는 [이 곳](https://github.com/intel/TVP)에서 찾을 수 있습니다.
+
+## 사용 팁 및 예시 [[usage-tips-and-examples]]
+
+프롬프트는 최적화된 교란 패턴으로 입력 비디오 프레임이나 텍스트 특징에 추가되는 패턴입니다. 범용 세트란 모든 입력에 대해 동일한 프롬프트 세트를 사용하는 것을 말합니다. 즉, 입력 내용과 관계없이 모든 비디오 프레임과 텍스트 특징에 이 프롬프트들을 일관적으로 추가합니다.
+
+TVP는 시각 인코더와 크로스 모달 인코더로 구성됩니다. 범용 시각 프롬프트와 텍스트 프롬프트 세트가 각각 샘플링된 비디오 프레임과 텍스트 특징에 통합됩니다. 특히, 서로 다른 시각 프롬프트 세트가 편집되지 않은 한 비디오에서 균일하게 샘플링된 프레임에 순서대로 적용됩니다.
+
+이 모델의 목표는 학습 가능한 프롬프트를 시각적 입력과 텍스트 특징 모두에 통합하여 Temporal Video Grounding(TVG) 문제를 해결하는 것입니다.
+
+원칙적으로, 제안된 아키텍처에는 어떤 시각 인코더나 크로스 모달 인코더라도 적용할 수 있습니다.
+
+[TvpProcessor]는 [BertTokenizer]와 [TvpImageProcessor]를 단일 인스턴스로 래핑하여 텍스트를 인코딩하고 이미지를 각각 준비합니다.
+
+다음 예시는 [TvpProcessor]와 [TvpForVideoGrounding]을 사용하여 TVG를 실행하는 방법을 보여줍니다.
+
+```python
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+
+
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    원본 fps의 비디오를 지정한 fps(target_fps)로 변환하고 PyAV 디코더로 비디오를 디코딩합니다.
+    Args:
+        container (container): pyav 컨테이너 객체입니다.
+        sampling_rate (int): 프레임 샘플링 속도입니다.(샘플링된 두개의 프레임 사이의 간격을 말합니다)
+        num_frames (int): 샘플링할 프레임 수입니다.
+        clip_idx (int): clip_idx가 -1이면 시간 축에서 무작위 샘플링을 수행합니다.
+            clip_idx가 -1보다 크면 비디오를 num_clips 개로 균등 분할한 후
+            clip_idx번째 비디오 클립을 선택합니다.
+        num_clips (int): 주어진 비디오에서 균일하게 샘플링할 전체 클립 수입니다.
+        target_fps (int): 입력 비디오의 fps가 다를 수 있으므로, 샘플링 전에
+            지정한 fps로 변환합니다
+    Returns:
+        frames (tensor): 비디오에서 디코딩된 프레임입니다. 비디오 스트림을 찾을 수 없는 경우
+            None을 반환합니다.
+        fps (float): 비디오의 초당 프레임 수입니다.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+
+
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    비디오를 디코딩하고 시간 축 샘플링을 수행합니다.
+    Args:
+        container (container): pyav 컨테이너 객체입니다.
+        sampling_rate (int): 프레임 샘플링 속도입니다.(샘플링된 두개의 프레임 사이의 간격을 말합니다)
+        num_frames (int): 샘플링할 프레임 수입니다.
+        clip_idx (int): clip_idx가 -1이면 시간 축에서 무작위 샘플링을 수행합니다.
+            clip_idx가 -1보다 크면 비디오를 num_clips 개로 균등 분할한 후
+            clip_idx번째 비디오 클립을 선택합니다.
+        num_clips (int): 주어진 비디오에서 균일하게 샘플링할 전체 클립 수입니다.
+        target_fps (int): 입력 비디오의 fps가 다를 수 있으므로, 샘플링 전에
+            지정한 fps로 변환합니다
+    Returns:
+        frames (tensor): 비디오에서 디코딩된 프레임입니다.
+    '''
+    assert clip_idx >= -2, "Not a valid clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+
+
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
+```
+
+팁:
+- 이 TVP 구현은 텍스트 임베딩을 생성하기 위해 [BertTokenizer]를 사용하고, 시각적 임베딩을 계산하기 위해 Resnet-50 모델을 사용합니다.
+- 사전 학습된 [tvp-base](https://huggingface.co/Intel/tvp-base)의 체크포인트가 공개되어 있습니다.
+- 시간적 비디오 그라운딩 작업에 대한 TVP의 성능은 [표 2](https://huggingface.co/papers/2303.04995)를 참고하세요.
+
+## TvpConfig [[transformers.TvpConfig]]
+
+[[autodoc]] TvpConfig
+
+## TvpImageProcessor [[transformers.TvpImageProcessor]]
+
+[[autodoc]] TvpImageProcessor
+    - preprocess
+
+## TvpProcessor [[transformers.TvpProcessor]]
+
+[[autodoc]] TvpProcessor
+    - __call__
+
+## TvpModel [[transformers.TvpModel]]
+
+[[autodoc]] TvpModel
+    - forward
+
+## TvpForVideoGrounding [[transformers.TvpForVideoGrounding]]
+
+[[autodoc]] TvpForVideoGrounding
+    - forward
--- a/docs/source/ko/model_sharing.md
+++ b/docs/source/ko/model_sharing.md
@ -56,7 +56,7 @@ picture-in-picture" allowfullscreen></iframe>
 모델을 허브에 공유하기 전에 Hugging Face 자격 증명이 필요합니다. 터미널에 액세스할 수 있는 경우, 🤗 Transformers가 설치된 가상 환경에서 다음 명령을 실행합니다. 그러면 Hugging Face 캐시 폴더(기본적으로 `~/.cache/`)에 액세스 토큰을 저장합니다:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Jupyter 또는 Colaboratory와 같은 노트북을 사용 중인 경우, [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library) 라이브러리가 설치되었는지 확인하세요. 이 라이브러리를 사용하면 API로 허브와 상호 작용할 수 있습니다.
--- a/docs/source/ko/model_summary.md
+++ b/docs/source/ko/model_summary.md
@ -1,107 +0,0 @@
-<!--Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Transformer 모델군[[the-transformer-model-family]]
-
-2017년에 소개된 [기본 Transformer](https://huggingface.co/papers/1706.03762) 모델은 자연어 처리(NLP) 작업을 넘어 새롭고 흥미로운 모델들에 영감을 주었습니다. [단백질 접힘 구조 예측](https://huggingface.co/blog/deep-learning-with-proteins), [치타의 달리기 훈련](https://huggingface.co/blog/train-decision-transformers), [시계열 예측](https://huggingface.co/blog/time-series-transformers) 등을 위한 다양한 모델이 생겨났습니다. Transformer의 변형이 너무 많아서, 큰 그림을 놓치기 쉽습니다. 하지만 여기 있는 모든 모델의 공통점은 기본 Trasnformer 아키텍처를 기반으로 한다는 점입니다. 일부 모델은 인코더 또는 디코더만 사용하고, 다른 모델들은 인코더와 디코더를 모두 사용하기도 합니다. 이렇게 Transformer 모델군 내 상위 레벨에서의 차이점을 분류하고 검토하면 유용한 분류 체계를 얻을 수 있으며, 이전에 접해보지 못한 Transformer 모델들 또한 이해하는 데 도움이 될 것입니다. 
-
-기본 Transformer 모델에 익숙하지 않거나 복습이 필요한 경우, Hugging Face 강의의 [트랜스포머는 어떻게 동작하나요?](https://huggingface.co/course/chapter1/4?fw=pt) 챕터를 확인하세요. 
-
-<div align="center">
-    <iframe width="560" height="315" src="https://www.youtube.com/embed/H39Z_720T5s" title="YouTube video player"
-    frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;
-    picture-in-picture" allowfullscreen></iframe>
-</div>
-
-## 컴퓨터 비전[[computer-vision]]
-
-<iframe style="border: 1px solid rgba(0, 0, 0, 0.1);" width="1000" height="450" src="https://www.figma.com/embed?embed_host=share&url=https%3A%2F%2Fwww.figma.com%2Ffile%2FacQBpeFBVvrDUlzFlkejoz%2FModelscape-timeline%3Fnode-id%3D0%253A1%26t%3Dm0zJ7m2BQ9oe0WtO-1" allowfullscreen></iframe> 
-
-### 합성곱 네트워크[[convolutional-network]]
-
-[Vision Transformer](https://huggingface.co/papers/2010.11929)가 확장성과 효율성을 입증하기 전까지 오랫동안 합성곱 네트워크(CNN)가 컴퓨터 비전 작업의 지배적인 패러다임이었습니다. 그럼에도 불구하고, 이동 불변성(translation invariance)과 같은 CNN의 우수한 부분이 도드라지기 때문에 몇몇 (특히 특정 과업에서의) Transformer 모델은 아키텍처에 합성곱을 통합하기도 했습니다. [ConvNeXt](model_doc/convnext)는 이런 관례를 뒤집어 CNN을 현대화하기 위해 Transformer의 디자인을 차용합니다. 예를 들면 ConvNeXt는 겹치지 않는 슬라이딩 창(sliding window)을 사용하여 이미지를 패치화하고, 더 큰 커널로 전역 수용 필드(global receptive field)를 확장시킵니다. ConvNeXt는 또한 메모리 효율을 높이고 성능을 향상시키기 위해 여러 레이어 설계를 선택하기 때문에 Transformer와 견줄만합니다!
-
-### 인코더[[cv-encoder]]
-
-[Vision Transformer(ViT)](model_doc/vit)는 합성곱 없는 컴퓨터 비전 작업의 막을 열었습니다. ViT는 표준 Transformer 인코더를 사용하지만, 가장 큰 혁신은 이미지를 처리하는 방식이었습니다. 문장을 토큰으로 분할하는 것처럼 이미지를 고정된 크기의 패치로 분할하고, 이를 사용하여 임베딩을 생성합니다. ViT는 Transformer의 효율적인 아키텍처를 활용하여 훈련에 더 적은 자원을 사용하면서도 당시 CNN에 비견하는 결과를 입증했습니다. 그리고 ViT를 뒤이어 분할(segmentation)과 같은 고밀도 비전 작업과 탐지 작업도 다룰 수 있는 다른 비전 모델이 등장했습니다.
-
-이러한 모델 중 하나가 [Swin](model_doc/swin) Transformer입니다. 이 모델은 작은 크기의 패치에서 계층적 특징 맵(CNN 👀과 같지만 ViT와는 다름)을 만들고 더 깊은 레이어의 인접 패치와 병합합니다. 어텐션(Attention)은 지역 윈도우 내에서만 계산되며, 모델이 더 잘 학습할 수 있도록 어텐션 레이어 간에 윈도우를 이동하며 연결을 생성합니다. Swin Transformer는 계층적 특징 맵을 생성할 수 있으므로, 분할(segmentation)과 탐지와 같은 고밀도 예측 작업에 적합합니다. [SegFormer](model_doc/segformer) 역시 Transformer 인코더를 사용하여 계층적 특징 맵을 구축하지만, 상단에 간단한 다층 퍼셉트론(MLP) 디코더를 추가하여 모든 특징 맵을 결합하고 예측을 수행합니다. 
-
-BeIT와 ViTMAE와 같은 다른 비전 모델은 BERT의 사전훈련 목표(objective)에서 영감을 얻었습니다. [BeIT](model_doc/beit)는 *마스크드 이미지 모델링(MIM)*으로 사전훈련되며, 이미지 패치는 임의로 마스킹되고 이미지도 시각적 토큰으로 토큰화됩니다. BeIT는 마스킹된 패치에 해당하는 시각적 토큰을 예측하도록 학습됩니다. [ViTMAE](model_doc/vitmae)도 비슷한 사전훈련 목표가 있지만, 시각적 토큰 대신 픽셀을 예측해야 한다는 점이 다릅니다. 특이한 점은 이미지 패치의 75%가 마스킹되어 있다는 것입니다! 디코더는 마스킹된 토큰과 인코딩된 패치에서 픽셀을 재구성합니다. 사전훈련이 끝나면 디코더는 폐기되고 인코더는 다운스트림 작업에 사용할 준비가 됩니다.
-
-### 디코더[[cv-decoder]]
-
-대부분의 비전 모델은 인코더에 의존하여 이미지 표현을 학습하기 때문에 디코더 전용 비전 모델은 드뭅니다. 하지만 이미지 생성 등의 사례의 경우, GPT-2와 같은 텍스트 생성 모델에서 보았듯이 디코더가 가장 적합합니다. [ImageGPT](model_doc/imagegpt)는 GPT-2와 동일한 아키텍처를 사용하지만, 시퀀스의 다음 토큰을 예측하는 대신 이미지의 다음 픽셀을 예측합니다. ImageGPT는 이미지 생성 뿐만 아니라 이미지 분류를 위해 미세 조정할 수도 있습니다. 
-
-### 인코더-디코더[[cv-encoder-decoder]]
-
-비전 모델은 일반적으로 인코더(백본으로도 알려짐)를 사용하여 중요한 이미지 특징을 추출한 후, 이를 Transformer 디코더로 전달합니다. [DETR](model_doc/detr)에 사전훈련된 백본이 있지만, 객체 탐지를 위해 완전한 Transformer 인코더-디코더 아키텍처도 사용합니다. 인코더는 이미지 표현을 학습하고 이를 디코더에서 객체 쿼리(각 객체 쿼리는 이미지의 영역 또는 객체에 중점을 두고 학습된 임베딩)와 결합합니다. DETR은 각 객체 쿼리에 대한 바운딩 박스 좌표와 클래스 레이블을 예측합니다.
-
-## 자연어처리[[natural-language-processing]]
-
-<iframe style="border: 1px solid rgba(0, 0, 0, 0.1);" width="1000" height="450" src="https://www.figma.com/embed?embed_host=share&url=https%3A%2F%2Fwww.figma.com%2Ffile%2FUhbQAZDlpYW5XEpdFy6GoG%2Fnlp-model-timeline%3Fnode-id%3D0%253A1%26t%3D4mZMr4r1vDEYGJ50-1" allowfullscreen></iframe>
-
-### 인코더[[nlp-encoder]]
-
-[BERT](model_doc/bert)는 인코더 전용 Transformer로, 다른 토큰을 보고 소위 "부정 행위"를 저지르는 걸 막기 위해 입력에서 특정 토큰을 임의로 마스킹합니다. 사전훈련의 목표는 컨텍스트를 기반으로 마스킹된 토큰을 예측하는 것입니다. 이를 통해 BERT는 왼쪽과 오른쪽 컨텍스트를 충분히 활용하여 입력에 대해 더 깊고 풍부한 표현을 학습할 수 있습니다. 그러나 BERT의 사전훈련 전략에는 여전히 개선의 여지가 남아 있었습니다. [RoBERTa](model_doc/roberta)는 더 긴 시간 동안 더 큰 배치에 대한 훈련을 포함하고, 전처리 중에 한 번만 마스킹하는 것이 아니라 각 에폭에서 토큰을 임의로 마스킹하고, 다음 문장 예측 목표를 제거하는 새로운 사전훈련 방식을 도입함으로써 이를 개선했습니다. 
-
-성능 개선을 위한 전략으로 모델 크기를 키우는 것이 지배적입니다. 하지만 큰 모델을 훈련하려면 계산 비용이 많이 듭니다. 계산 비용을 줄이는 한 가지 방법은 [DistilBERT](model_doc/distilbert)와 같이 작은 모델을 사용하는 것입니다. DistilBERT는 압축 기법인 [지식 증류(knowledge distillation)](https://huggingface.co/papers/1503.02531)를 사용하여, 거의 모든 언어 이해 능력을 유지하면서 더 작은 버전의 BERT를 만듭니다. 
-
-그러나 대부분의 Transformer 모델에 더 많은 매개변수를 사용하는 경향이 이어졌고, 이에 따라 훈련 효율성을 개선하는 것에 중점을 둔 새로운 모델이 등장했습니다. [ALBERT](model_doc/albert)는 두 가지 방법으로 매개변수 수를 줄여 메모리 사용량을 줄였습니다. 바로 큰 어휘를 두 개의 작은 행렬로 분리하는 것과 레이어가 매개변수를 공유하도록 하는 것입니다. [DeBERTa](model_doc/deberta)는 단어와 그 위치를 두 개의 벡터로 개별적으로 인코딩하는 분리된(disentangled) 어텐션 메커니즘을 추가했습니다. 어텐션은 단어와 위치 임베딩을 포함하는 단일 벡터 대신 이 별도의 벡터에서 계산됩니다. [Longformer](model_doc/longformer)는 특히 시퀀스 길이가 긴 문서를 처리할 때, 어텐션을 더 효율적으로 만드는 것에 중점을 두었습니다. 지역(local) 윈도우 어텐션(각 토큰 주변의 고정된 윈도우 크기에서만 계산되는 어텐션)과 전역(global) 어텐션(분류를 위해 `[CLS]`와 같은 특정 작업 토큰에만 해당)의 조합을 사용하여 전체(full) 어텐션 행렬 대신 희소(sparse) 어텐션 행렬을 생성합니다. 
-
-### 디코더[[nlp-decoder]]
-
-[GPT-2](model_doc/gpt2)는 시퀀스에서 다음 단어를 예측하는 디코더 전용 Transformer입니다. 토큰을 오른쪽으로 마스킹하여 모델이 이전 토큰을 보고 "부정 행위"를 하지 못하도록 합니다. GPT-2는 방대한 텍스트에 대해 사전훈련하여 텍스트가 일부만 정확하거나 사실인 경우에도 상당히 능숙하게 텍스트를 생성할 수 있게 되었습니다. 하지만 GPT-2는 BERT가 사전훈련에서 갖는 양방향 컨텍스트가 부족하기 때문에 특정 작업에 적합하지 않았습니다. [XLNET](model_doc/xlnet)은 양방향 훈련이 가능한 permutation language modeling objective(PLM)를 사용하여 BERT와 GPT-2의 사전훈련 목표에 대한 장점을 함께 가지고 있습니다.
-
-GPT-2 이후, 언어 모델은 더욱 거대해졌고 현재는 *대규모 언어 모델(LLM)*로 알려져 있습니다. 충분히 큰 데이터 세트로 사전훈련된 LLM은 퓨샷(few-shot) 또는 제로샷(zero-shot) 학습을 수행합니다. [GPT-J](model_doc/gptj)는 6B 크기의 매개변수가 있고 400B 크기의 토큰으로 훈련된 LLM입니다. GPT-J에 이어 디코더 전용 모델군인 [OPT](model_doc/opt)가 등장했으며, 이 중 가장 큰 모델은 175B 크기이고 180B 크기의 토큰으로 훈련되었습니다. [BLOOM](model_doc/bloom)은 비슷한 시기에 출시되었으며, 이 중 가장 큰 모델은 176B 크기의 매개변수가 있고 46개의 언어와 13개의 프로그래밍 언어로 된 366B 크기의 토큰으로 훈련되었습니다. 
-
-### 인코더-디코더[[nlp-encoder-decoder]]
-
-[BART](model_doc/bart)는 기본 Transformer 아키텍처를 유지하지만, 일부 텍스트 스팬(span)이 단일 `마스크` 토큰으로 대체되는 *text infilling* 변형으로 사전훈련 목표를 수정합니다. 디코더는 변형되지 않은 토큰(향후 토큰은 마스킹됨)을 예측하고 인코더의 은닉 상태를 사용하여 이 작업을 돕습니다. [Pegasus](model_doc/pegasus)는 BART와 유사하지만, Pegasus는 텍스트 스팬 대신 전체 문장을 마스킹합니다. Pegasus는 마스크드 언어 모델링 외에도 gap sentence generation(GSG)로 사전훈련됩니다. GSG는 문서에 중요한 문장 전체를 마스킹하여 `마스크` 토큰으로 대체하는 것을 목표로 합니다. 디코더는 남은 문장에서 출력을 생성해야 합니다. [T5](model_doc/t5)는 특정 접두사를 사용하여 모든 NLP 작업을 텍스트 투 텍스트 문제로 변환하는 더 특수한 모델입니다. 예를 들어, 접두사 `Summarize:`은 요약 작업을 나타냅니다. T5는 지도(GLUE 및 SuperGLUE) 훈련과 자기지도 훈련(토큰의 15%를 임의로 샘플링하여 제거)으로 사전훈련됩니다.
-
-## 오디오[[audio]]
-
-<iframe style="border: 1px solid rgba(0, 0, 0, 0.1);" width="1000" height="450" src="https://www.figma.com/embed?embed_host=share&url=https%3A%2F%2Fwww.figma.com%2Ffile%2Fvrchl8jDV9YwNVPWu2W0kK%2Fspeech-and-audio-model-timeline%3Fnode-id%3D0%253A1%26t%3DmM4H8pPMuK23rClL-1" allowfullscreen></iframe>
-
-### 인코더[[audio-encoder]]
-
-[Wav2Vec2](model_doc/wav2vec2)는 Transformer 인코더를 사용하여 원본 오디오 파형(raw audio waveform)에서 직접 음성 표현을 학습합니다. 허위 음성 표현 세트에서 실제 음성 표현을 판별하는 대조 작업으로 사전훈련됩니다. [HuBERT](model_doc/hubert)는 Wav2Vec2와 유사하지만 훈련 과정이 다릅니다. 타겟 레이블이 유사한 오디오 세그먼트가 클러스터에 할당되어 은닉 단위(unit)가 되는 군집화(clustering) 단계에서 생성됩니다. 은닉 단위는 예측을 위한 임베딩에 매핑됩니다.
-
-### 인코더-디코더[[audio-encoder-decoder]]
-
-[Speech2Text](model_doc/speech_to_text)는 자동 음성 인식(ASR) 및 음성 번역을 위해 고안된 음성 모델입니다. 이 모델은 오디오 파형에서 추출한 log mel-filter bank 특징을 채택하고 자기회귀 방식으로 사전훈련하여, 전사본 또는 번역을 만듭니다. [Whisper](model_doc/whisper)은 ASR 모델이지만, 다른 많은 음성 모델과 달리 제로샷 성능을 위해 대량의 ✨ 레이블이 지정된 ✨ 오디오 전사 데이터에 대해 사전훈련됩니다. 데이터 세트의 큰 묶음에는 영어가 아닌 언어도 포함되어 있어서 자원이 적은 언어에도 Whisper를 사용할 수 있습니다. 구조적으로, Whisper는 Speech2Text와 유사합니다. 오디오 신호는 인코더에 의해 인코딩된 log-mel spectrogram으로 변환됩니다. 디코더는 인코더의 은닉 상태와 이전 토큰으로부터 자기회귀 방식으로 전사를 생성합니다.
-
-## 멀티모달[[multimodal]]
-
-<iframe style="border: 1px solid rgba(0, 0, 0, 0.1);" width="1000" height="450" src="https://www.figma.com/embed?embed_host=share&url=https%3A%2F%2Fwww.figma.com%2Ffile%2FcX125FQHXJS2gxeICiY93p%2Fmultimodal%3Fnode-id%3D0%253A1%26t%3DhPQwdx3HFPWJWnVf-1" allowfullscreen></iframe>
-
-### 인코더[[mm-encoder]]
-
-[VisualBERT](model_doc/visual_bert)는 BERT 이후에 출시된 비전 언어 작업을 위한 멀티모달 모델입니다. 이 모델은 BERT와 사전훈련된 객체 탐지 시스템을 결합하여 이미지 특징을 시각 임베딩으로 추출하고, 텍스트 임베딩과 함께 BERT로 전달합니다. VisualBERT는 마스킹되지 않은 텍스트와 시각 임베딩을 기반으로 마스킹된 텍스트를 예측하고, 텍스트가 이미지와 일치하는지 예측해야 합니다. ViT가 이미지 임베딩을 구하는 방식이 더 쉬웠기 때문에, ViT가 출시된 후 [ViLT](model_doc/vilt)는 아키텍처에 ViT를 채택했습니다. 이미지 임베딩은 텍스트 임베딩과 함께 처리됩니다. 여기에서, ViLT는 이미지 텍스트 매칭, 마스크드 언어 모델링, 전체 단어 마스킹을 통해 사전훈련됩니다.
-
-[CLIP](model_doc/clip)은 다른 접근 방식을 사용하여 (`이미지`, `텍스트`)의 쌍 예측을 수행합니다. (`이미지`, `텍스트`) 쌍에서의 이미지와 텍스트 임베딩 간의 유사도를 최대화하기 위해 4억 개의 (`이미지`, `텍스트`) 쌍 데이터 세트에 대해 이미지 인코더(ViT)와 텍스트 인코더(Transformer)를 함께 훈련합니다. 사전훈련 후, 자연어를 사용하여 이미지가 주어진 텍스트를 예측하거나 그 반대로 예측하도록 CLIP에 지시할 수 있습니다. [OWL-ViT](model_doc/owlvit)는 CLIP을 제로샷 객체 탐지를 위한 백본(backbone)으로 사용하여 CLIP 상에 구축됩니다. 사전훈련 후, 객체 탐지 헤드가 추가되어 (`클래스`, `바운딩 박스`) 쌍에 대한 집합(set) 예측을 수행합니다.
-
-### 인코더-디코더[[mm-encoder-decoder]]
-
-광학 문자 인식(OCR)은 이미지를 이해하고 텍스트를 생성하기 위해 다양한 구성 요소를 필요로 하는 전통적인 텍스트 인식 작업입니다. [TrOCR](model_doc/trocr)은 종단간(end-to-end) Transformer를 사용하여 이 프로세스를 간소화합니다. 인코더는 이미지 이해를 위한 ViT 방식의 모델이며 이미지를 고정된 크기의 패치로 처리합니다. 디코더는 인코더의 은닉 상태를 받아서 자기회귀 방식으로 텍스트를 생성합니다. [Donut](model_doc/donut)은 OCR 기반 접근 방식에 의존하지 않는 더 일반적인 시각 문서 이해 모델입니다. 이 모델은 Swin Transformer를 인코더로, 다국어 BART를 디코더로 사용합니다. Donut은 이미지와 텍스트 주석을 기반으로 다음 단어를 예측하여 텍스트를 읽도록 사전훈련됩니다. 디코더는 프롬프트가 주어지면 토큰 시퀀스를 생성합니다. 프롬프트는 각 다운스트림 작업에 대한 특수 토큰으로 표현됩니다. 예를 들어, 문서 파싱(parsing)에는 인코더의 은닉 상태와 결합되어 문서를 정형 출력 형식(JSON)으로 파싱하는 특수 `파싱` 토큰이 있습니다.
-
-## 강화 학습[[reinforcement-learning]]
-
-<iframe style="border: 1px solid rgba(0, 0, 0, 0.1);" width="1000" height="450" src="https://www.figma.com/embed?embed_host=share&url=https%3A%2F%2Fwww.figma.com%2Ffile%2FiB3Y6RvWYki7ZuKO6tNgZq%2Freinforcement-learning%3Fnode-id%3D0%253A1%26t%3DhPQwdx3HFPWJWnVf-1" allowfullscreen></iframe>
-
-### 디코더[[rl-decoder]]
-
-Decision 및 Trajectory Transformer는 상태(state), 행동(action), 보상(reward)을 시퀀스 모델링 문제로 표현합니다. [Decision Transformer](model_doc/decision_transformer)는 기대 보상(returns-to-go), 과거 상태 및 행동을 기반으로 미래의 원하는 수익(return)으로 이어지는 일련의 행동을 생성합니다. 마지막 *K* 시간 스텝(timestep)에 대해, 세 가지 모달리티는 각각 토큰 임베딩으로 변환되고 GPT와 같은 모델에 의해 처리되어 미래의 액션 토큰을 예측합니다. [Trajectory Transformer](model_doc/trajectory_transformer)도 상태, 행동, 보상을 토큰화하여 GPT 아키텍처로 처리합니다. 보상 조건에 중점을 둔 Decision Transformer와 달리 Trajectory Transformer는 빔 서치(beam search)로 미래 행동을 생성합니다.
--- a/docs/source/ko/multilingual.md
+++ b/docs/source/ko/multilingual.md
@ -1,192 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 다국어 모델 추론하기[[multilingual-models-for-inference]]
-
-[[open-in-colab]]
-
-🤗 Transformers에는 여러 종류의 다국어(multilingual) 모델이 있으며, 단일 언어(monolingual) 모델과 추론 시 사용법이 다릅니다.
-그렇다고 해서 *모든* 다국어 모델의 사용법이 다른 것은 아닙니다.
-
-[google-bert/bert-base-multilingual-uncased](https://huggingface.co/google-bert/bert-base-multilingual-uncased)와 같은 몇몇 모델은 단일 언어 모델처럼 사용할 수 있습니다.
-이번 가이드에서 다국어 모델의 추론 시 사용 방법을 알아볼 것입니다.
-
-## XLM[[xlm]]
-
-XLM에는 10가지 체크포인트(checkpoint)가 있는데, 이 중 하나만 단일 언어입니다. 
-나머지 체크포인트 9개는 언어 임베딩을 사용하는 체크포인트와 그렇지 않은 체크포인트의 두 가지 범주로 나눌 수 있습니다.
-
-### 언어 임베딩을 사용하는 XLM[[xlm-with-language-embeddings]]
-
-다음 XLM 모델은 추론 시에 언어 임베딩을 사용합니다:
-
- `FacebookAI/xlm-mlm-ende-1024` (마스킹된 언어 모델링, 영어-독일어)
- `FacebookAI/xlm-mlm-enfr-1024` (마스킹된 언어 모델링, 영어-프랑스어)
- `FacebookAI/xlm-mlm-enro-1024` (마스킹된 언어 모델링, 영어-루마니아어)
- `FacebookAI/xlm-mlm-xnli15-1024` (마스킹된 언어 모델링, XNLI 데이터 세트에서 제공하는 15개 국어)
- `FacebookAI/xlm-mlm-tlm-xnli15-1024` (마스킹된 언어 모델링 + 번역, XNLI 데이터 세트에서 제공하는 15개 국어)
- `FacebookAI/xlm-clm-enfr-1024` (Causal language modeling, 영어-프랑스어)
- `FacebookAI/xlm-clm-ende-1024` (Causal language modeling, 영어-독일어)
-
-언어 임베딩은 모델에 전달된 `input_ids`와 동일한 shape의 텐서로 표현됩니다.
-이러한 텐서의 값은 사용된 언어에 따라 다르며 토크나이저의 `lang2id` 및 `id2lang` 속성에 의해 식별됩니다.
-
-다음 예제에서는 `FacebookAI/xlm-clm-enfr-1024` 체크포인트(코잘 언어 모델링(causal language modeling), 영어-프랑스어)를 가져옵니다:
-
-```py
->>> import torch
->>> from transformers import XLMTokenizer, XLMWithLMHeadModel
-
->>> tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
->>> model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
-```
-
-토크나이저의 `lang2id` 속성은 모델의 언어와 해당 ID를 표시합니다:
-
-```py
->>> print(tokenizer.lang2id)
-{'en': 0, 'fr': 1}
-```
-
-다음으로, 예제 입력을 만듭니다:
-
-```py
->>> input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # 배치 크기는 1입니다
-```
-
-언어 ID를 `"en"`으로 설정해 언어 임베딩을 정의합니다. 
-언어 임베딩은 영어의 언어 ID인 `0`으로 채워진 텐서입니다.
-이 텐서는 `input_ids`와 같은 크기여야 합니다. 
-
-```py
->>> language_id = tokenizer.lang2id["en"]  # 0
->>> langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, ..., 0])
-
->>> # (batch_size, sequence_length) shape의 텐서가 되도록 만듭니다.
->>> langs = langs.view(1, -1)  # 이제 [1, sequence_length] shape이 되었습니다(배치 크기는 1입니다)
-```
-
-이제 `input_ids`와 언어 임베딩을 모델로 전달합니다:
-
-```py
->>> outputs = model(input_ids, langs=langs)
-```
-
-[run_generation.py](https://github.com/huggingface/transformers/tree/main/examples/pytorch/text-generation/run_generation.py) 스크립트로 `xlm-clm` 체크포인트를 사용해 텍스트와 언어 임베딩을 생성할 수 있습니다.
-
-### 언어 임베딩을 사용하지 않는 XLM[[xlm-without-language-embeddings]]
-
-다음 XLM 모델은 추론 시에 언어 임베딩이 필요하지 않습니다:
-
- `FacebookAI/xlm-mlm-17-1280` (마스킹된 언어 모델링, 17개 국어)
- `FacebookAI/xlm-mlm-100-1280` (마스킹된 언어 모델링, 100개 국어)
-
-이전의 XLM 체크포인트와 달리 이 모델은 일반 문장 표현에 사용됩니다.
-
-## BERT[[bert]]
-
-다음 BERT 모델은 다국어 태스크에 사용할 수 있습니다:
-
- `google-bert/bert-base-multilingual-uncased` (마스킹된 언어 모델링 + 다음 문장 예측, 102개 국어)
- `google-bert/bert-base-multilingual-cased` (마스킹된 언어 모델링 + 다음 문장 예측, 104개 국어)
-
-이러한 모델은 추론 시에 언어 임베딩이 필요하지 않습니다. 
-문맥에서 언어를 식별하고, 식별된 언어로 추론합니다.
-
-## XLM-RoBERTa[[xlmroberta]]
-
-다음 XLM-RoBERTa 또한 다국어 다국어 태스크에 사용할 수 있습니다:
-
- `FacebookAI/xlm-roberta-base` (마스킹된 언어 모델링, 100개 국어)
- `FacebookAI/xlm-roberta-large` (마스킹된 언어 모델링, 100개 국어)
-
-XLM-RoBERTa는 100개 국어에 대해 새로 생성되고 정제된 2.5TB 규모의 CommonCrawl 데이터로 학습되었습니다.
-이전에 공개된 mBERT나 XLM과 같은 다국어 모델에 비해 분류, 시퀀스 라벨링, 질의 응답과 같은 다운스트림(downstream) 작업에서 이점이 있습니다.
-
-## M2M100[[m2m100]]
-
-다음 M2M100 모델 또한 다국어 다국어 태스크에 사용할 수 있습니다:
-
- `facebook/m2m100_418M` (번역)
- `facebook/m2m100_1.2B` (번역)
-
-이 예제에서는 `facebook/m2m100_418M` 체크포인트를 가져와서 중국어를 영어로 번역합니다. 
-토크나이저에서 번역 대상 언어(source language)를 설정할 수 있습니다:
-
-```py
->>> from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
-
->>> en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
->>> chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
-
->>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
->>> model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
-```
-
-문장을 토큰화합니다:
-
-```py
->>> encoded_zh = tokenizer(chinese_text, return_tensors="pt")
-```
-
-M2M100은 번역을 진행하기 위해 첫 번째로 생성되는 토큰은 번역할 언어(target language) ID로 강제 지정합니다.
-영어로 번역하기 위해 `generate` 메소드에서 `forced_bos_token_id`를 `en`으로 설정합니다:
-
-```py
->>> generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
->>> tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
-'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
-```
-
-## MBart[[mbart]]
-
-다음 MBart 모델 또한 다국어 태스크에 사용할 수 있습니다:
-
- `facebook/mbart-large-50-one-to-many-mmt` (일대다 다국어 번역, 50개 국어)
- `facebook/mbart-large-50-many-to-many-mmt` (다대다 다국어 번역, 50개 국어)
- `facebook/mbart-large-50-many-to-one-mmt` (다대일 다국어 번역, 50개 국어)
- `facebook/mbart-large-50` (다국어 번역, 50개 국어)
- `facebook/mbart-large-cc25`
-
-이 예제에서는 핀란드어를 영어로 번역하기 위해 `facebook/mbart-large-50-many-to-many-mmt` 체크포인트를 가져옵니다. 
-토크나이저에서 번역 대상 언어(source language)를 설정할 수 있습니다:
-
-```py
->>> from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-
->>> en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
->>> fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
-
->>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
->>> model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
-```
-
-문장을 토큰화합니다:
-
-```py
->>> encoded_en = tokenizer(en_text, return_tensors="pt")
-```
-
-MBart는 번역을 진행하기 위해 첫 번째로 생성되는 토큰은 번역할 언어(target language) ID로 강제 지정합니다.
-영어로 번역하기 위해 `generate` 메소드에서 `forced_bos_token_id`를 `en`으로 설정합니다:
-
-```py
->>> generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id("en_XX"))
->>> tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
-"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
-```
-
-`facebook/mbart-large-50-many-to-one-mmt` 체크포인트를 사용하고 있다면, 첫 번째로 생성되는 토큰을 번역할 언어(target language) ID로 강제 지정할 필요는 없습니다.
--- a/docs/source/ko/perf_train_gpu_one.md
+++ b/docs/source/ko/perf_train_gpu_one.md
@ -0,0 +1,294 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# GPU[[gpu]]
+
+GPU는 높은 메모리 대역폭과 병렬 처리 능력 덕분에 딥러닝 모델 학습에 널리 사용됩니다. GPU 사양과 모델 크기에 따라 수십억 개 매개변수를 가진 모델도 학습할 수 있습니다. 핵심은 GPU 메모리 활용도(데이터 처리량/학습 시간)와 학습 속도 사이에서 최적의 균형을 찾는 것입니다.
+
+이 가이드는 Transformers와 PyTorch에서 GPU를 활용해 모델을 효율적으로 학습하기 위해 제공하는 기능을 소개합니다. 대부분의 경우, 이 기능들을 조합해서 학습을 최적화하는 것이 좋습니다.
+
+아래 표를 참고하면 자신의 학습 시나리오에 적합한 기능을 빠르게 파악할 수 있습니다.
+
+| 기능                        | 학습 속도 가속 | 메모리 사용량 절약 |
+| --------------------------- | --------- | ------------- |
+| 배치 크기                   | 예        | 예            |
+| 그레이디언트 누적           | 아니요    | 예            |
+| 그레이디언트 체크포인팅     | 아니요    | 예            |
+| 혼합 정밀도                 | 예        | 조건부        |
+| 옵티마이저                  | 예        | 예            |
+| 데이터 사전 적재            | 예        | 아니요        |
+| torch_empty_cache_steps     | 아니요    | 예            |
+| torch.compile               | 예        | 아니요        |
+| 스케일된 내적 어텐션 (SDPA) | 예        | 예            |
+
+## Trainer[[trainer]]
+
+Trainer는 [`TrainingArguments`]로 설정할 수 있는 다양한 학습 기능을 제공합니다. 이번 섹션에서는 학습 최적화에 특히 유용한 주요 기능 몇 가지를 살펴봅니다.
+
+### 배치 크기[[batch-size]]
+
+배치 크기는 GPU 학습 효율을 좌우하는 가장 중요한 하이퍼파라미터 중 하나로, 메모리 사용량과 학습 속도에 직접적인 영향을 줍니다. 배치 크기를 크게 하면 GPU의 병렬 처리 능력을 극대화하여 학습 속도를 높일 수 있습니다. 일반적으로 8, 64, 128, 256, 512처럼 2의 거듭제곱 값을 사용하는 것이 좋습니다. 적절한 배치 크기는 GPU 사양과 모델의 데이터 타입에 따라 달라집니다.
+
+배치 크기는 [`TrainingArguments`]의 [`~TrainingArguments.per_device_train_batch_size`] 옵션으로 설정합니다.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=256,
+    per_device_eval_batch_size=256,
+)
+```
+
+성능, 입력 피처 수와 출력 뉴런 수, 배치 크기가 성능에 미치는 영향에 대해서는 NVIDIA [Performance](https://docs.nvidia.com/deeplearning/performance/dl-performance-fully-connected/index.html#input-features) 가이드를 참고하세요. 이 매개변수들은 GPU에서 실행되는 General Matrix Multiplications(GEMMs)에 사용됩니다. 매개변수가 클수록 병렬화와 효율성이 향상됩니다.
+
+데이터 타입과 GPU에 따른 최적의 배치 크기를 선택해 텐서 곱셈 속도를 극대화하려면, [Tensor Core Requirements](https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc) 섹션을 참고하는 것이 유용합니다. 그 예시로, fp16에서는 8의 배수가 권장되지만, A100 GPU에서는 64의 배수가 더 적합하다는 사실을 확인할 수 있습니다.
+
+마지막으로, 작은 매개변수를 사용할 때는 [Dimension Quantization Effects](https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#dim-quantization)를 고려하세요. 행렬 차원이 GPU 스레드 블록의 타일 크기로 나누어지지 않으면 타일 양자화가 발생하여 GPU 자원을 충분히 활용하지 못합니다. 행렬이 타일 크기로 정확히 나뉘도록 올바른 배치 크기 배수를 선택하며 학습 속도가 크게 향상됩니다.
+
+### 그레이디언트 누적[[gradient-accumulation]]
+
+그레이디언트 누적은 메모리 제약을 극복하는 방법으로, 단일 GPU에 맞지 않는 매우 큰 모델을 학습할 때 유용합니다. 이는 매개변수를 업데이트하기 전에 여러 미니 배치에 걸쳐 그레이디언트를 누적하는 방식입니다. 그 결과, 저장해야 하는 그레이디언트 수가 줄어 메모리 사용량이 줄어들고, 일반적으로 하나의 배치에서만 매개변수를 갱신하는 방식보다 더 큰 유효 배치 크기로 학습할 수 있습니다. 다만, 추가적인 순전파와 역전파가 필요하기 때문에 학습 속도가 느려질 수 있습니다.
+
+그레이디언트 누적을 활성화하려면 [`TrainingArguments`]에서 [`TrainingArguments.per_device_train_batch_size`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+# 효율적인 배치 크기 64
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+)
+```
+
+학습 속도가 느려질 수 있기 때문에 그레이디언트 누적 단계를 너무 크게 설정하지 않는 것이 좋습니다. 아래 예시를 참고하세요, GPU에 담을 수 있는 최대 배치 크기가 4라면 GPU의 효율적인 사용을 위해 배치 크기를 4로 유지하는 것이 좋습니다.
+
+| 배치 크기 | 그레이디언트 누적 단계 | 효율적인 배치 크기 |     |
+| --------- | ---------------------- | ------------------ | --- |
+| 1         | 64                     | 64                 | 👎  |
+| 4         | 16                     | 64                 | 👍  |
+
+### 그레이디언트 체크포인팅[[gradient-checkpointing]]
+
+그레이디언트 체크포인팅은 역전파 과정에서 일부 중간 활성화 값만 저장하고 나머지는 다시 계산해 메모리 사용량을 줄입니다. 이를 통해 순전파 과정에서 모든 중간 활성화 값을 저장하지 않아도 되어 메모리 오버헤드를 크게 줄일 수 있습니다. 다만, 학습 속도가 약 20% 느려지는 한계가 있습니다.
+
+그레이디언트 누적을 활성화하려면 [`TrainingArguments`]에서 [`~TrainingArguments.gradient_checkpointing`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+)
+```
+
+### 혼합 정밀도[[mixed-precision]]
+
+혼합 정밀도는 일부 계산을 반정밀도(fp16)로, 나머지를 전정밀도(fp32)로 수행해 학습 속도를 높이는 기법입니다. 반정밀도 계산은 전정밀도보다 계산량이 적어 더 빠르게 수행됩니다. 한편, 전정밀도로 일부 계산을 수행하면 정확도를 유지할 수 있습니다.
+
+혼합 정밀도 학습을 위해 여러 자료형을 사용할 수 있습니다.
+
+<hfoptions id="mixed-precision">
+<hfoption id="fp16">
+
+혼합 정밀도 학습의 주요 장점은 활성화 값을 fp16으로 저장할 수 있다는 것입니다.
+
+fp16 자료형으로 혼합 정밀도 학습을 활성화하려면 [`TrainingArguments`]에서 [`~TrainingArguments.fp16`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    fp16=True.
+)
+```
+
+fp16은 메모리 사용에 최적화된 방식이 아닙니다. 이는 fp16으로 계산된 그레이디언트가 최적화 단계에서 fp32로 다시 변환되기 때문입니다. 특히 배치 크기가 작을 때는, GPU에 두 가지 자료형(fp16, fp32)이 적재되어 있기 때문에 더 많은 GPU 메모리를 사용하게 됩니다.
+</hfoption>
+<hfoption id="bf16">
+
+[bf16](https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus)은 일부 정밀도를 포기하는 대신, 훨씬 더 넓은 동적 범위를 제공하여 오버플로와 언더플로 오류를 방지하는 데 도움이 됩니다. bf16은 fp16과 달리 손실 스케일링 기법을 추가하지 않고도 사용할 수 있습니다. bf16은 NVIDIA의 Ampere 아키텍처 이상에서 지원됩니다.
+
+bf16 자료형으로 혼합 정밀도 학습을 활성화하려면 [`TrainingArguments`]에서 [`~TrainingArguments.bf16`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True,
+)
+```
+
+</hfoption>
+<hfoption id="tf32">
+
+[tf32](https://blogs.nvidia.com/blog/tensorfloat-32-precision-format/)는 NVIDIA Ampere GPU에서 합성곱과 행렬곱 입력을 tf32로 변환하는 모드입니다. 다른 모든 저장과 연산은 fp32로 유지됩니다. 이를 통해 tf32는 fp32와 동일한 범위, fp16과 동일한 정밀도, 그리고 bf16보다 더 높은 정밀도를 유지할 수 있습니다. tf32를 fp16 또는 bf16 혼합 정밀도 학습과 결합하면 처리량을 16배 향상할 수 있습니다.
+
+tf32는 NVIDIA Ampere GPU에서 기본적으로 활성화되어 있지만, fp32 학습 또는 추론 코드에 아래 코드를 추가하여 명시적으로 활성화할 수도 있습니다.
+
+```py
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+```
+
+tf32 모드에서 혼합 정밀도 학습을 활성화하려면 [`TrainingArguments`]에서 [tf32()](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments.tf32) 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True.
+    tf32=True,
+)
+```
+
+</hfoption>
+</hfoptions>
+
+### 옵티마이저[[optimizers]]
+
+Transformers는 기본적으로 PyTorch의 [AdamW (adamw_torch)](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html) 옵티마이저를 사용합니다. 하지만, 이 옵티마이저는 과거 그레이디언트의 가중 평균을 저장하기 때문에, 그레이디언트를 저장하기 위해 모델 매개변수 수에 비례한 추가 메모리가 필요합니다. 이는 매우 큰 모델을 학습할 때 문제가 될 수 있으며, 이러면 다른 옵티마이저를 선택하는 것을 고려해야 합니다. 예를 들어, [NVIDIA](https://github.com/NVIDIA/apex) 또는 [AMD](https://github.com/ROCm/apex)에 [Apex](https://nvidia.github.io/apex/index.html)가 설치되어 있다면, 모든 AdamW 옵티마이저 중 `adamw_apex_fused` 옵티마이저를 사용하는 것이 가장 빠른 학습 속도를 얻을 수 있습니다.
+
+옵티마이저를 선택하기 위해서는 [`TrainingArguments`]에서 [`~TrainingArguments.optim`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True,
+    optim="adamw_bnb_8bit"
+)
+```
+학습 시나리오에 맞게 선택할 수 있는 다양한 옵티마이저가 있습니다. (전체 지원 목록은 [OptimizerNames](https://github.com/huggingface/transformers/blob/34f4080ff59b1668d919a1ba9f8bc4a3a2a3f478/src/transformers/training_args.py#L145)를 참고하세요) 예를 들어 Adafactor는 행렬의 각 요소 대신 행 또는 열 단위의 가중 평균만 저장해 메모리 요구량을 크게 줄일 수 있지만, 수렴 속도는 느려질 수 있습니다. 또 다른 예로, bitandbytes의 [8-bit AdamW optimizer](https://huggingface.co/docs/bitsandbytes)를 사용하면 옵티마이저의 상태를 8비트로 양자화할 수 있습니다. 옵티마이저 상태는 낮은 정밀도로 저장되었다가 옵티마이저 단계에서 사용되기 전에 역 양자화됩니다.
+
+특화된 옵티마이저에 대해 더 알고 싶다면 [optimizer](./optimizers) 가이드를 참고하세요.
+
+### 데이터 사전 적재[[data-preloading]]
+
+데이터 사전 적재(Data preloading)는 GPU가 지속적으로 작업할 수 있도록 CPU에서 미리 배치 단위의 데이터를 적재하고 준비하는 기능입니다. 이를 통해 GPU 유휴 시간을 줄이고 활용도를 높일 수 있습니다. GPU가 항상 작업을 계속하도록 하려면 다음 데이터 사전 적재를 위한 두 가지 방법을 사용할 수 있습니다.
+
+1. 데이터를 저장할 고정 메모리를 CPU에 할당한 뒤, 이를 GPU로 직접 전송합니다.
+2. CPU 스레드 및 워커 수를 늘려 데이터를 더 빠르게 사전 적재합니다.
+
+고정 메모리를 할당하고 워커 수를 늘리기 위해서는 [`TrainingArguments`]에서 [`~TrainingArguments.dataloader_pin_memory`]와 [`~TrainingArguments.dataloader_num_workers`] 옵션을 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True,
+    optim="adamw_bnb_8bit",
+    dataloader_pin_memory=True,
+    dataloader_num_workers=4,
+)
+```
+
+## PyTorch[[pytorch]]
+
+PyTorch는 메모리 요구사항을 줄이고 학습 속도를 높이기 위한 여러 기능을 제공합니다. 이러한 기능들은 Transformers에서 몇 줄의 코드만 추가하여 활성화할 수 있습니다.
+
+### torch.empty_cache_steps[[torchemptycachesteps]]
+
+[torch.cuda.empty_cache](https://pytorch.org/docs/stable/generated/torch.cuda.empty_cache.html#torch.cuda.empty_cache) 함수는 사용하지 않는 캐시 메모리를 해제하여 메모리 부족(OOM) 오류를 방지할 수 있지만, 학습 속도가 약 10% 느려질 수 있습니다.
+
+특정 학습 단계 이후에 이 기능을 활성화하고 싶다면, [`TrainingArguments`]에서 [torch_empty_cache_steps()](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments.torch_empty_cache_steps)를 설정하세요.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True,
+    optim="adamw_bnb_8bit",
+    dataloader_pin_memory=True,
+    dataloader_num_workers=4,
+    torch_empty_cache_steps=4,
+)
+```
+
+### torch.compile[[torchcompile]]
+
+[torch.compile](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html)은 PyTorch 코드를 최적화된 커널로 컴파일해 학습 속도를 크게 높여줍니다. 이 기능은 TorchDynamo를 사용해 프레임 평가 API로부터 PyTorch 그래프를 캡처하며, 이렇게 캡처한 그래프는 다양한 백엔드에 추가로 최적화된 커널로 컴파일될 수 있습니다.
+
+이를 활성화하려면 [`TrainingArguments`]에서 [`~TrainingArguments.torch_compile`]를 설정하세요. 백엔드는 [torch_compile_backend()](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments.torch_compile_backend)를 통해 선택할 수 있습니다.
+
+```py
+from transformers import TrainingArguments
+
+args = TrainingArguments(
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=16,
+    gradient_checkpointing=True,
+    bf16=True,
+    optim="adamw_bnb_8bit",
+    dataloader_pin_memory=True,
+    dataloader_num_workers=4,
+    torch_empty_cache_steps=4,
+    torch_compile=True,
+    torch_compile_backend="inductor"
+)
+```
+
+아래 표를 참고하여 학습 시나리오에 적합한 백엔드를 선택하세요.
+
+| 백엔드         | 설명                                                                                                                                                                   | 목표         |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------ |
+| eager          | PyTorch를 사용해 추출된 GraphModule을 실행합니다                                                                                                                       | 디버깅       |
+| aot_eager      | AOTAutograd로 추출된 순전파 및 역전파 그래프를 Pytorch eager 모드로 실행합니다                                                                                         | 디버깅       |
+| inductor       | Triton 커널을 활용하는 TorchInductor와 AOTAutograd, CUDA Graphs를 사용합니다                                                                                           | 학습 및 추론 |
+| nvfuser        | TorchScript와 함께 nvFuser를 사용합니다                                                                                                                                | 학습 및 추론 |
+| aot_nvfuser    | AOTAutograd와 함께 nvFuser를 사용합니다                                                                                                                                | 학습 및 추론 |
+| aot_cudagraphs | AOTAutograd와 함께 CUDA Graphs를 사용합니다                                                                                                                            | 학습 및 추론 |
+| ofi            | TorchScripts의 [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html#torch-jit-optimize-for-inference)를 사용합니다 | 추론         |
+| fx2trt         | [Torch-TensorRT](https://pytorch.org/TensorRT/tutorials/getting_started_with_fx_path.html)를 사용합니다                                                                | 추론         |
+| onnxrt         | CPU 및 GPU 추론을 위해 [ONNX-RT](https://onnxruntime.ai/)를 사용합니다                                                                                                 | 추론         |
+| ipex           | CPU 추론을 위해 [IPEX](https://github.com/intel/intel-extension-for-pytorch)를 사용합니다                                                                              | 추론         |
+
+### 스케일된 내적 어텐션[[scaled-dot-production-attention]]
+
+[torch.nn.functional.scaled_dot_product_attention](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html) (SDPA)는 스케일된 내적 어텐션 메커니즘을 PyTorch에 내장해 구현한 함수입니다. SDPA는 트랜스포머 모델의 기존 어텐션 메커니즘보다 더 효율적이고 최적화되어 있습니다. 세 가지 유형의 스케일된 내적 어텐션을 지원합니다.
+
+- [FlashAttention2](https://github.com/Dao-AILab/flash-attention)는 fp16 또는 bf16 torch 타입 모델에서 자동으로 활성화됩니다. 먼저 모델을 적절한 타입으로 캐스팅했는지 확인하세요.
+- [xFormers](https://github.com/facebookresearch/xformers) 또는 Memory-Efficient Attention은 fp32 torch 타입 모델을 지원합니다.
+- C++로 구현된 스케일된 내적 어텐션입니다.
+
+SDPA는 PyTorch 2.1.1 버전 이상에서 기본적으로 활성화되어 있지만, [`~PreTrainedModel.from_pretrained`]에서 `attn_implementation="sdpa"`를 설정해 명시적으로 활성화할 수 있습니다.
+
+```py
+from transformers import AutoModelForCausalLM
+
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B", device_map="auto", attn_implementation="sdpa")
+```
--- a/docs/source/ko/perf_train_tpu_tf.md
+++ b/docs/source/ko/perf_train_tpu_tf.md
@ -1,162 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# TensorFlow로 TPU에서 훈련하기[[training-on-tpu-with-tensorflow]]
-
-<Tip>
-
-자세한 설명이 필요하지 않고 바로 TPU 샘플 코드를 시작하고 싶다면 [우리의 TPU 예제 노트북!](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb)을 확인하세요.
-
-</Tip>
-
-### TPU가 무엇인가요?[[what-is-a-tpu]]
-
-TPU는 **텐서 처리 장치**입니다. Google에서 설계한 하드웨어로, GPU처럼 신경망 내에서 텐서 연산을 더욱 빠르게 처리하기 위해 사용됩니다. 네트워크 훈련과 추론 모두에 사용할 수 있습니다. 일반적으로 Google의 클라우드 서비스를 통해 이용할 수 있지만, Google Colab과 Kaggle Kernel을 통해 소규모 TPU를 무료로 직접 이용할 수도 있습니다.
-
-[🤗 Transformers의 모든 Tensorflow 모델은 Keras 모델](https://huggingface.co/blog/tensorflow-philosophy)이기 때문에, 이 문서에서 다루는 대부분의 메소드는 대체로 모든 Keras 모델을 위한 TPU 훈련에 적용할 수 있습니다! 하지만 Transformer와 데이터 세트의 HuggingFace 생태계(hug-o-system?)에 특화된 몇 가지 사항이 있으며, 해당 사항에 대해 설명할 때 반드시 언급하도록 하겠습니다.
-
-### 어떤 종류의 TPU가 있나요?[[what-kinds-of-tpu-are-available]]
-
-신규 사용자는 TPU의 범위와 다양한 이용 방법에 대해 매우 혼란스러워하는 경우가 많습니다. **TPU 노드**와 **TPU VM**의 차이점은 가장 먼저 이해해야 할 핵심적인 구분 사항입니다.
-
-**TPU 노드**를 사용한다면, 실제로는 원격 TPU를 간접적으로 이용하는 것입니다. 네트워크와 데이터 파이프라인을 초기화한 다음, 이를 원격 노드로 전달할 별도의 VM이 필요합니다. Google Colab에서 TPU를 사용하는 경우, **TPU 노드** 방식으로 이용하게 됩니다.
-
-TPU 노드를 사용하는 것은 이를 사용하지 않는 사용자에게 예기치 않은 현상이 발생하기도 합니다! 특히, TPU는 파이썬 코드를 실행하는 기기(machine)와 물리적으로 다른 시스템에 있기 때문에 로컬 기기에 데이터를 저장할 수 없습니다. 즉, 컴퓨터의 내부 저장소에서 가져오는 데이터 파이프라인은 절대 작동하지 않습니다! 로컬 기기에 데이터를 저장하는 대신에, 데이터 파이프라인이 원격 TPU 노드에서 실행 중일 때에도 데이터 파이프라인이 계속 이용할 수 있는 Google Cloud Storage에 데이터를 저장해야 합니다.
-
-<Tip>
-
-메모리에 있는 모든 데이터를 `np.ndarray` 또는 `tf.Tensor`로 맞출 수 있다면, Google Cloud Storage에 업로드할 필요 없이, Colab 또는 TPU 노드를 사용해서 해당 데이터에 `fit()` 할 수 있습니다.
-
-</Tip>
-
-<Tip>
-
-**🤗특수한 Hugging Face 팁🤗:** TF 코드 예제에서 볼 수 있는 `Dataset.to_tf_dataset()` 메소드와 그 상위 래퍼(wrapper)인 `model.prepare_tf_dataset()`는 모두 TPU 노드에서 작동하지 않습니다. 그 이유는 `tf.data.Dataset`을 생성하더라도 “순수한” `tf.data` 파이프라인이 아니며 `tf.numpy_function` 또는 `Dataset.from_generator()`를 사용하여 기본 HuggingFace `Dataset`에서 데이터를 전송하기 때문입니다. 이 HuggingFace `Dataset`는 로컬 디스크에 있는 데이터로 지원되며 원격 TPU 노드가 읽을 수 없습니다.
-
-</Tip>
-
-TPU를 이용하는 두 번째 방법은 **TPU VM**을 사용하는 것입니다. TPU VM을 사용할 때, GPU VM에서 훈련하는 것과 같이 TPU가 장착된 기기에 직접 연결합니다. 특히 데이터 파이프라인과 관련하여, TPU VM은 대체로 작업하기 더 쉽습니다. 위의 모든 경고는 TPU VM에는 해당되지 않습니다!
-
-이 문서는 의견이 포함된 문서이며, 저희의 의견이 여기에 있습니다: **가능하면 TPU 노드를 사용하지 마세요.** TPU 노드는 TPU VM보다 더 복잡하고 디버깅하기가 더 어렵습니다. 또한 향후에는 지원되지 않을 가능성이 높습니다. Google의 최신 TPU인 TPUv4는 TPU VM으로만 이용할 수 있으므로, TPU 노드는 점점 더 "구식" 이용 방법이 될 것으로 전망됩니다. 그러나 TPU 노드를 사용하는 Colab과 Kaggle Kernel에서만 무료 TPU 이용이 가능한 것으로 확인되어, 필요한 경우 이를 다루는 방법을 설명해 드리겠습니다! 이에 대한 자세한 설명이 담긴 코드 샘플은 [TPU 예제 노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb)에서 확인하시기 바랍니다.
-
-### 어떤 크기의 TPU를 사용할 수 있나요?[[what-sizes-of-tpu-are-available]]
-
-단일 TPU(v2-8/v3-8/v4-8)는 8개의 복제본(replicas)을 실행합니다. TPU는 수백 또는 수천 개의 복제본을 동시에 실행할 수 있는 **pod**로 존재합니다. 단일 TPU를 하나 이상 사용하지만 전체 Pod보다 적게 사용하는 경우(예를 들면, v3-32), TPU 구성을 **pod 슬라이스**라고 합니다.
-
-Colab을 통해 무료 TPU에 이용하는 경우, 기본적으로 단일 v2-8 TPU를 제공받습니다.
-
-### XLA에 대해 들어본 적이 있습니다. XLA란 무엇이고 TPU와 어떤 관련이 있나요?[[i-keep-hearing-about-this-xla-thing-whats-xla-and-how-does-it-relate-to-tpus]]
-
-XLA는 최적화 컴파일러로, TensorFlow와 JAX에서 모두 사용됩니다. JAX에서는 유일한 컴파일러이지만, TensorFlow에서는 선택 사항입니다(하지만 TPU에서는 필수입니다!). Keras 모델을 훈련할 때 이를 활성화하는 가장 쉬운 방법은 `jit_compile=True` 인수를 `model.compile()`에 전달하는 것입니다. 오류가 없고 성능이 양호하다면, TPU로 전환할 준비가 되었다는 좋은 신호입니다!
-
-TPU에서 디버깅하는 것은 대개 CPU/GPU보다 조금 더 어렵기 때문에, TPU에서 시도하기 전에 먼저 XLA로 CPU/GPU에서 코드를 실행하는 것을 권장합니다. 물론 오래 학습할 필요는 없습니다. 즉, 모델과 데이터 파이프라인이 예상대로 작동하는지 확인하기 위해 몇 단계만 거치면 됩니다.
-
-<Tip>
-
-XLA로 컴파일된 코드는 대체로 더 빠릅니다. 따라서 TPU에서 실행할 계획이 없더라도, `jit_compile=True`를 추가하면 성능이 향상될 수 있습니다. 하지만 XLA 호환성에 대한 아래 주의 사항을 반드시 확인하세요!
-
-</Tip>
-
-<Tip warning={true}>
-
-**뼈아픈 경험에서 얻은 팁:** `jit_compile=True`를 사용하면 속도를 높이고 CPU/GPU 코드가 XLA와 호환되는지 검증할 수 있는 좋은 방법이지만, 실제 TPU에서 훈련할 때 그대로 남겨두면 많은 문제를 초래할 수 있습니다. XLA 컴파일은 TPU에서 암시적으로 이뤄지므로, 실제 TPU에서 코드를 실행하기 전에 해당 줄을 제거하는 것을 잊지 마세요!
-
-</Tip>
-
-### 제 XLA 모델과 호환하려면 어떻게 해야 하나요?[[how-do-i-make-my-model-xla-compatible]]
-
-대부분의 경우, 여러분의 코드는 이미 XLA와 호환될 것입니다! 그러나 표준 TensorFlow에서 작동하지만, XLA에서는 작동하지 않는 몇 가지 사항이 있습니다. 이를 아래 세 가지 핵심 규칙으로 간추렸습니다:
-
-<Tip>
-
-**특수한 HuggingFace 팁🤗:** 저희는 TensorFlow 모델과 손실 함수를 XLA와 호환되도록 재작성하는 데 많은 노력을 기울였습니다. 저희의 모델과 손실 함수는 대개 기본적으로 규칙 #1과 #2를 따르므로 `transformers` 모델을 사용하는 경우, 이를 건너뛸 수 있습니다. 하지만 자체 모델과 손실 함수를 작성할 때는 이러한 규칙을 잊지 마세요!
-
-</Tip>
-
-#### XLA 규칙 #1: 코드에서 “데이터 종속 조건문”을 사용할 수 없습니다[[xla-rule-1-your-code-cannot-have-datadependent-conditionals]]
-
-어떤 `if`문도 `tf.Tensor` 내부의 값에 종속될 수 없다는 것을 의미합니다. 예를 들어, 이 코드 블록은 XLA로 컴파일할 수 없습니다!
-
-```python
-if tf.reduce_sum(tensor) > 10:
-    tensor = tensor / 2.0
-```
-
-처음에는 매우 제한적으로 보일 수 있지만, 대부분의 신경망 코드에서는 이를 수행할 필요가 없습니다. `tf.cond`를 사용하거나([여기](https://www.tensorflow.org/api_docs/python/tf/cond) 문서를 참조), 다음과 같이 조건문을 제거하고 대신 지표 변수를 사용하는 영리한 수학 트릭을 찾아내어 이 제한을 우회할 수 있습니다:
-
-```python
-sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
-tensor = tensor / (1.0 + sum_over_10)
-```
-
-이 코드는 위의 코드와 정확히 동일한 효과를 구현하지만, 조건문을 제거하여 문제 없이 XLA로 컴파일되도록 합니다!
-
-#### XLA 규칙 #2: 코드에서 "데이터 종속 크기"를 가질 수 없습니다[[xla-rule-2-your-code-cannot-have-datadependent-shapes]]
-
-코드에서 모든 `tf.Tensor` 객체의 크기가 해당 값에 종속될 수 없다는 것을 의미합니다. 예를 들어, `tf.unique` 함수는 입력에서 각 고유 값의 인스턴스 하나를 포함하는 `tensor`를 반환하기 때문에 XLA로 컴파일할 수 없습니다. 이 출력의 크기는 입력 `Tensor`가 얼마나 반복적인지에 따라 분명히 달라질 것이므로, XLA는 이를 처리하지 못합니다!
-
-일반적으로, 대부분의 신경망 코드는 기본값으로 규칙 2를 따릅니다. 그러나 문제가 되는 몇 가지 대표적인 사례가 있습니다. 가장 흔한 사례 중 하나는 **레이블 마스킹**을 사용하여 손실(loss)을 계산할 때, 해당 위치를 무시하도록 나타내기 위해 레이블을 음수 값으로 설정하는 경우입니다. 레이블 마스킹을 지원하는 NumPy나 PyTorch 손실 함수를 보면 [불 인덱싱](https://numpy.org/doc/stable/user/basics.indexing.html#boolean-array-indexing)을 사용하는 다음과 같은 코드를 자주 접할 수 있습니다:
-
-```python
-label_mask = labels >= 0
-masked_outputs = outputs[label_mask]
-masked_labels = labels[label_mask]
-loss = compute_loss(masked_outputs, masked_labels)
-mean_loss = torch.mean(loss)
-```
-
-이 코드는 NumPy나 PyTorch에서는 문제 없이 작동하지만, XLA에서는 손상됩니다! 왜 그럴까요? 얼마나 많은 위치가 마스킹되는지에 따라 `masked_outputs`와 `masked_labels`의 크기가 달라져서, **데이터 종속 크기**가 되기 때문입니다. 그러나 규칙 #1과 마찬가지로, 이 코드를 다시 작성하면 데이터 종속적 모양 크기가 정확히 동일한 출력을 산출할 수 있습니다.
-
-```python
-label_mask = tf.cast(labels >= 0, tf.float32)
-loss = compute_loss(outputs, labels)
-loss = loss * label_mask  # Set negative label positions to 0
-mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
-```
-
-여기서, 모든 위치에 대한 손실을 계산하지만, 평균을 계산할 때 분자와 분모 모두에서 마스크된 위치를 0으로 처리합니다. 이는 데이터 종속 크기를 방지하고 XLA 호환성을 유지하면서 첫 번째 블록과 정확히 동일한 결과를 산출합니다. 규칙 #1에서와 동일한 트릭을 사용하여 `tf.bool`을 `tf.float32`로 변환하고 이를 지표 변수로 사용합니다. 해당 트릭은 매우 유용하며, 자체 코드를 XLA로 변환해야 할 경우 기억해 두세요!
-
-#### XLA 규칙 #3: XLA는 각기 다른 입력 크기가 나타날 때마다 모델을 다시 컴파일해야 합니다[[xla-rule-3-xla-will-need-to-recompile-your-model-for-every-different-input-shape-it-sees]]
-
-이것은 가장 큰 문제입니다. 입력 크기가 매우 가변적인 경우, XLA는 모델을 반복해서 다시 컴파일해야 하므로 성능에 큰 문제가 발생할 수 있습니다. 이 문제는 토큰화 후 입력 텍스트의 길이가 가변적인 NLP 모델에서 주로 발생합니다. 다른 모달리티에서는 정적 크기가 더 흔하며, 해당 규칙이 훨씬 덜 문제시 됩니다.
-
-규칙 #3을 어떻게 우회할 수 있을까요? 핵심은 **패딩**입니다. 모든 입력을 동일한 길이로 패딩한 다음, `attention_mask`를 사용하면 어떤 XLA 문제도 없이 가변 크기에서 가져온 것과 동일한 결과를 가져올 수 있습니다. 그러나 과도한 패딩은 심각한 속도 저하를 야기할 수도 있습니다. 모든 샘플을 전체 데이터 세트의 최대 길이로 패딩하면, 무한한 패딩 토큰으로 구성된 배치가 생성되어 많은 연산과 메모리가 낭비될 수 있습니다!
-
-이 문제에 대한 완벽한 해결책은 없습니다. 하지만, 몇 가지 트릭을 시도해볼 수 있습니다. 한 가지 유용한 트릭은 **샘플 배치를 32 또는 64 토큰과 같은 숫자의 배수까지 패딩하는 것입니다.** 이는 토큰 수가 소폭 증가하지만, 모든 입력 크기가 32 또는 64의 배수여야 하기 때문에 고유한 입력 크기의 수가 대폭 줄어듭니다. 고유한 입력 크기가 적다는 것은 XLA 컴파일 횟수가 적어진다는 것을 의미합니다!
-
-<Tip>
-
-**🤗특수한 HuggingFace 팁🤗:** 토크나이저와 데이터 콜레이터에 도움이 될 수 있는 메소드가 있습니다. 토크나이저를 불러올 때 `padding="max_length"` 또는 `padding="longest"`를 사용하여 패딩된 데이터를 출력하도록 할 수 있습니다. 토크나이저와 데이터 콜레이터는 나타나는 고유한 입력 크기의 수를 줄이기 위해 사용할 수 있는 `pad_to_multiple_of` 인수도 있습니다!
-
-</Tip>
-
-### 실제 TPU로 모델을 훈련하려면 어떻게 해야 하나요?[[how-do-i-actually-train-my-model-on-tpu]]
-
-훈련이 XLA와 호환되고 (TPU 노드/Colab을 사용하는 경우) 데이터 세트가 적절하게 준비되었다면, TPU에서 실행하는 것은 놀랍도록 쉽습니다! 코드에서 몇 줄만 추가하여, TPU를 초기화하고 모델과 데이터 세트가 `TPUStrategy` 범위 내에 생성되도록 변경하면 됩니다. [우리의 TPU 예제 노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb)을 참조하여 실제로 작동하는 모습을 확인해 보세요!
-
-### 요약[[summary]]
-
-여기에 많은 내용이 포함되어 있으므로, TPU 훈련을 위한 모델을 준비할 때 따를 수 있는 간략한 체크리스트로 요약해 보겠습니다:
-
- 코드가 XLA의 세 가지 규칙을 따르는지 확인합니다.
- CPU/GPU에서 `jit_compile=True`로 모델을 컴파일하고 XLA로 훈련할 수 있는지 확인합니다.
- 데이터 세트를 메모리에 가져오거나 TPU 호환 데이터 세트를 가져오는 방식을 사용합니다([노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb) 참조)
- 코드를 Colab(accelerator가 “TPU”로 설정됨) 또는 Google Cloud의 TPU VM으로 마이그레이션합니다.
- TPU 초기화 코드를 추가합니다([노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb) 참조)
- `TPUStrategy`를 생성하고 데이터 세트를 가져오는 것과 모델 생성이 `strategy.scope()` 내에 있는지 확인합니다([노트북](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tpu_training-tf.ipynb) 참조)
- TPU로 이동할 때 `jit_compile=True`를 다시 설정하는 것을 잊지 마세요!
- 🙏🙏🙏🥺🥺🥺
- model.fit()을 불러옵니다.
- 여러분이 해냈습니다!
--- a/docs/source/ko/performance.md
+++ b/docs/source/ko/performance.md
@ -1,96 +0,0 @@
-<!---
-Copyright 2021 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 성능 및 확장성 [[performance-and-scalability]]
-
-점점 더 큰 규모의 트랜스포머 모델을 훈련하고 프로덕션에 배포하는 데에는 다양한 어려움이 따릅니다. 훈련 중에는 모델이 사용 가능한 GPU 메모리보다 더 많은 메모리를 필요로 하거나 훈련 속도가 매우 느릴 수 있으며, 추론을 위해 배포할 때는 제품 환경에서 요구되는 처리량으로 인해 과부하가 발생할 수 있습니다. 이 문서는 이러한 문제를 극복하고 사용 사례에 가장 적합한 설정을 찾도록 도움을 주기 위해 설계되었습니다. 훈련과 추론으로 가이드를 분할했는데, 이는 각각 다른 문제와 해결 방법이 있기 때문입니다. 그리고 각 가이드에는 다양한 종류의 하드웨어 설정에 대한 별도의 가이드가 있습니다(예: 훈련을 위한 단일 GPU vs 다중 GPU 또는 추론을 위한 CPU vs GPU).
-
-![perf_overview](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/perf_overview.png)
-
-이 문서는 사용자의 상황에 유용할 수 있는 방법들에 대한 개요 및 시작점 역할을 합니다.
-
-## 훈련 [[training]]
-
-효율적인 트랜스포머 모델 훈련에는 GPU나 TPU와 같은 가속기가 필요합니다. 가장 일반적인 경우는 단일 GPU만 사용하는 경우지만, 다중 GPU 및 CPU 훈련에 대한 섹션도 있습니다(곧 더 많은 내용이 추가될 예정).
-
-<Tip>
-
- 참고: 단일 GPU 섹션에서 소개된 대부분의 전략(예: 혼합 정밀도 훈련 또는 그라디언트 누적)은 일반적인 모델 훈련에도 적용되므로, 다중 GPU나 CPU 훈련과 같은 섹션을 살펴보기 전에 꼭 참고하시길 바랍니다.
-
-</Tip>
-
-### 단일 GPU [[single-gpu]]
-
-단일 GPU에서 대규모 모델을 훈련하는 것은 어려울 수 있지만, 이를 가능하게 하는 여러 가지 도구와 방법이 있습니다. 이 섹션에서는 혼합 정밀도 훈련, 그라디언트 누적 및 체크포인팅, 효율적인 옵티마이저, 최적의 배치 크기를 결정하기 위한 전략 등에 대해 논의합니다.
-
-[단일 GPU 훈련 섹션으로 이동](perf_train_gpu_one)
-
-### 다중 GPU [[multigpu]]
-
-단일 GPU에서 훈련하는 것이 너무 느리거나 대규모 모델에 적합하지 않은 경우도 있습니다. 다중 GPU 설정으로 전환하는 것은 논리적인 단계이지만, 여러 GPU에서 한 번에 훈련하려면 각 GPU마다 모델의 전체 사본을 둘지, 혹은 모델 자체도 여러 GPU에 분산하여 둘지 등 새로운 결정을 내려야 합니다. 이 섹션에서는 데이터, 텐서 및 파이프라인 병렬화에 대해 살펴봅니다.
-
-[다중 GPU 훈련 섹션으로 이동](perf_train_gpu_many)
-
-### CPU [[cpu]]
-
-
-[CPU 훈련 섹션으로 이동](perf_train_cpu)
-
-
-### TPU [[tpu]]
-
-[_곧 제공될 예정_](perf_train_tpu)
-
-### 특수한 하드웨어 [[specialized-hardware]]
-
-[_곧 제공될 예정_](perf_train_special)
-
-## 추론 [[inference]]
-
-제품 및 서비스 환경에서 대규모 모델을 효율적으로 추론하는 것은 모델을 훈련하는 것만큼 어려울 수 있습니다. 이어지는 섹션에서는 CPU 및 단일/다중 GPU 설정에서 추론을 진행하는 단계를 살펴봅니다.
-
-### CPU [[cpu]]
-
-[CPU 추론 섹션으로 이동](perf_infer_cpu)
-
-### 단일 GPU [[single-gpu]]
-
-[단일 GPU 추론 섹션으로 이동](perf_infer_gpu_one)
-
-### 다중 GPU [[multigpu]]
-
-[다중 GPU 추론 섹션으로 이동](perf_infer_gpu_many)
-
-### 특수한 하드웨어 [[specialized-hardware]]
-
-[_곧 제공될 예정_](perf_infer_special)
-
-## 하드웨어 [[hardware]]
-
-하드웨어 섹션에서는 자신만의 딥러닝 장비를 구축할 때 유용한 팁과 요령을 살펴볼 수 있습니다.
-
-[하드웨어 섹션으로 이동](perf_hardware)
-
-
-## 기여하기 [[contribute]]
-
-이 문서는 완성되지 않은 상태이며, 추가해야 할 내용이나 수정 사항이 많이 있습니다. 따라서 추가하거나 수정할 내용이 있으면 주저하지 말고 PR을 열어 주시거나, 자세한 내용을 논의하기 위해 Issue를 시작해 주시기 바랍니다.
-
-A가 B보다 좋다고 하는 기여를 할 때는, 재현 가능한 벤치마크와/또는 해당 정보의 출처 링크를 포함해주세요(당신으로부터의 직접적인 정보가 아닌 경우).
--- a/docs/source/ko/pipeline_gradio.md
+++ b/docs/source/ko/pipeline_gradio.md
@ -0,0 +1,52 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# 머신러닝 앱 [[machine-learning-apps]]
+
+머신러닝 앱을 빠르고 쉽게 구축하고 공유할 수 있는 라이브러리인 [Gradio](https://www.gradio.app/)는 [`Pipeline`]과 통합되어 추론을 위한 간단한 인터페이스를 빠르게 생성할 수 있습니다.
+
+시작하기 전에 Gradio가 설치되어 있는지 확인하세요.
+
+```py
+!pip install gradio
+```
+
+원하는 작업에 맞는 pipeline을 생성한 다음, Gradio의 [Interface.from_pipeline](https://www.gradio.app/docs/gradio/interface#interface-from_pipeline) 함수에 전달하여 인터페이스를 만드세요. Gradio는 [`Pipeline`]에 맞는 입력 및 출력 컴포넌트를 자동으로 결정합니다.
+
+[launch](https://www.gradio.app/main/docs/gradio/blocks#blocks-launch)를 추가하여 웹 서버를 생성하고 앱을 시작하세요.
+
+```py
+from transformers import pipeline
+import gradio as gr
+
+pipeline = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipeline).launch()
+```
+
+웹 앱은 기본적으로 로컬 서버에서 실행됩니다. 다른 사용자와 앱을 공유하려면 [launch](https://www.gradio.app/main/docs/gradio/blocks#blocks-launch)에서 `share=True`로 설정하여 임시 공개 링크를 생성하세요. 더 지속적인 솔루션을 원한다면 Hugging Face [Spaces](https://hf.co/spaces)에서 앱을 호스팅하세요.
+
+```py
+gr.Interface.from_pipeline(pipeline).launch(share=True)
+```
+
+아래 Space는 위 코드를 사용하여 생성되었으며, Spaces에서 호스팅됩니다.
+
+<iframe
+	src="https://stevhliu-gradio-pipeline-demo.hf.space"
+	frameborder="0"
+	width="850"
+	height="850"
+></iframe>
--- a/docs/source/ko/preprocessing.md
+++ b/docs/source/ko/preprocessing.md
@ -1,539 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 전처리[[preprocess]]
-
-[[open-in-colab]]
-
-모델을 훈련하려면 데이터 세트를 모델에 맞는 입력 형식으로 전처리해야 합니다. 텍스트, 이미지 또는 오디오인지 관계없이 데이터를 텐서 배치로 변환하고 조립할 필요가 있습니다. 🤗 Transformers는 모델에 대한 데이터를 준비하는 데 도움이 되는 일련의 전처리 클래스를 제공합니다. 이 튜토리얼에서는 다음 내용을 배울 수 있습니다:
-
-* 텍스트는 [Tokenizer](./main_classes/tokenizer)를 사용하여 토큰 시퀀스로 변환하고 토큰의 숫자 표현을 만든 후 텐서로 조립합니다.
-* 음성 및 오디오는 [Feature extractor](./main_classes/feature_extractor)를 사용하여 오디오 파형에서 시퀀스 특성을 파악하여 텐서로 변환합니다.
-* 이미지 입력은 [ImageProcessor](./main_classes/image)을 사용하여 이미지를 텐서로 변환합니다.
-* 멀티모달 입력은 [Processor](./main_classes/processors)을 사용하여 토크나이저와 특성 추출기 또는 이미지 프로세서를 결합합니다.
-
-<Tip>
-
-`AutoProcessor`는 **언제나** 작동하여 토크나이저, 이미지 프로세서, 특성 추출기 또는 프로세서 등 사용 중인 모델에 맞는 클래스를 자동으로 선택합니다.
-
-</Tip>
-
-시작하기 전에 🤗 Datasets를 설치하여 실험에 사용할 데이터를 불러올 수 있습니다:
-
-```bash
-pip install datasets
-```
-
-## 자연어처리[[natural-language-processing]]
-
-<Youtube id="Yffk5aydLzg"/>
-
-텍스트 데이터를 전처리하기 위한 기본 도구는 [tokenizer](main_classes/tokenizer)입니다. 토크나이저는 일련의 규칙에 따라 텍스트를 *토큰*으로 나눕니다. 토큰은 숫자로 변환되고 텐서는 모델 입력이 됩니다. 모델에 필요한 추가 입력은 토크나이저에 의해 추가됩니다.
-
-<Tip>
-
-사전훈련된 모델을 사용할 계획이라면 모델과 함께 사전훈련된 토크나이저를 사용하는 것이 중요합니다. 이렇게 하면 텍스트가 사전훈련 말뭉치와 동일한 방식으로 분할되고 사전훈련 중에 동일한 해당 토큰-인덱스 쌍(일반적으로 *vocab*이라고 함)을 사용합니다.
-
-</Tip>
-
-시작하려면 [`AutoTokenizer.from_pretrained`] 메소드를 사용하여 사전훈련된 토크나이저를 불러오세요. 모델과 함께 사전훈련된 *vocab*을 다운로드합니다:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
-```
-
-그 다음으로 텍스트를 토크나이저에 넣어주세요:
-
-```py
->>> encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
->>> print(encoded_input)
-{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
- 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
- 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
-```
-
-토크나이저는 세 가지 중요한 항목을 포함한 딕셔너리를 반환합니다:
-
-* [input_ids](glossary#input-ids)는 문장의 각 토큰에 해당하는 인덱스입니다.
-* [attention_mask](glossary#attention-mask)는 토큰을 처리해야 하는지 여부를 나타냅니다.
-* [token_type_ids](glossary#token-type-ids)는 두 개 이상의 시퀀스가 있을 때 토큰이 속한 시퀀스를 식별합니다.
-
-`input_ids`를 디코딩하여 입력을 반환합니다:
-
-```py
->>> tokenizer.decode(encoded_input["input_ids"])
-'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
-```
-
-토크나이저가 두 개의 특수한 토큰(분류 토큰 `CLS`와 분할 토큰 `SEP`)을 문장에 추가했습니다.
-모든 모델에 특수한 토큰이 필요한 것은 아니지만, 필요하다면 토크나이저가 자동으로 추가합니다.
-
-전처리할 문장이 여러 개 있는 경우에는 리스트로 토크나이저에 전달합니다:
-
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_inputs = tokenizer(batch_sentences)
->>> print(encoded_inputs)
-{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
-               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-               [101, 1327, 1164, 5450, 23434, 136, 102]],
- 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0]],
- 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
-                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-                    [1, 1, 1, 1, 1, 1, 1]]}
-```
-
-### 패딩[[pad]]
-
-모델 입력인 텐서는 모양이 균일해야 하지만, 문장의 길이가 항상 같지는 않기 때문에 문제가 될 수 있습니다. 패딩은 짧은 문장에 특수한 *패딩 토큰*을 추가하여 텐서를 직사각형 모양이 되도록 하는 전략입니다.
-
-`padding` 매개변수를 `True`로 설정하여 배치 내의 짧은 시퀀스를 가장 긴 시퀀스에 맞춰 패딩합니다.
-
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True)
->>> print(encoded_input)
-{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
- 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
- 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
-```
-
-길이가 짧은 첫 문장과 세 번째 문장이 이제 `0`으로 채워졌습니다.
-
-### 잘라내기[[truncation]]
-
-한편, 때로는 시퀀스가 모델에서 처리하기에 너무 길 수도 있습니다. 이 경우, 시퀀스를 더 짧게 줄일 필요가 있습니다.
-
-모델에서 허용하는 최대 길이로 시퀀스를 자르려면 `truncation` 매개변수를 `True`로 설정하세요:
-
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
->>> print(encoded_input)
-{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
- 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
- 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
-```
-
-<Tip>
-
-다양한 패딩과 잘라내기 인수에 대해 더 알아보려면 [패딩과 잘라내기](./pad_truncation) 개념 가이드를 확인해보세요.
-
-</Tip>
-
-### 텐서 만들기[[build-tensors]]
-
-마지막으로, 토크나이저가 모델에 공급되는 실제 텐서를 반환하도록 합니다.
-
-`return_tensors` 매개변수를 PyTorch의 경우 `pt`, TensorFlow의 경우 `tf`로 설정하세요:
-
-<frameworkcontent>
-<pt>
-
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
->>> print(encoded_input)
-{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
- 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
- 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
-```
-</pt>
-<tf>
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
->>> print(encoded_input)
-{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-       [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-       [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
-      dtype=int32)>,
- 'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>,
- 'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-       [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>}
-```
-</tf>
-</frameworkcontent>
-
-## 오디오[[audio]]
-
-오디오 작업은 모델에 맞는 데이터 세트를 준비하기 위해 [특성 추출기](main_classes/feature_extractor)가 필요합니다. 특성 추출기는 원시 오디오 데이터에서 특성를 추출하고 이를 텐서로 변환하는 것이 목적입니다.
-
-오디오 데이터 세트에 특성 추출기를 사용하는 방법을 보기 위해 [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) 데이터 세트를 가져오세요. (데이터 세트를 가져오는 방법은 🤗 [데이터 세트 튜토리얼](https://huggingface.co/docs/datasets/load_hub)에서 자세히 설명하고 있습니다.)
-
-```py
->>> from datasets import load_dataset, Audio
-
->>> dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
-```
-
-`audio` 열의 첫 번째 요소에 접근하여 입력을 살펴보세요. `audio` 열을 호출하면 오디오 파일을 자동으로 가져오고 리샘플링합니다.
-
-```py
->>> dataset[0]["audio"]
-{'array': array([ 0.        ,  0.00024414, -0.00024414, ..., -0.00024414,
-         0.        ,  0.        ], dtype=float32),
- 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
- 'sampling_rate': 8000}
-```
-
-이렇게 하면 세 가지 항목이 반환됩니다:
-
-* `array`는 1D 배열로 가져와서 (필요한 경우) 리샘플링된 음성 신호입니다.
-* `path`는 오디오 파일의 위치를 가리킵니다.
-* `sampling_rate`는 음성 신호에서 초당 측정되는 데이터 포인트 수를 나타냅니다.
-
-이 튜토리얼에서는 [Wav2Vec2](https://huggingface.co/facebook/wav2vec2-base) 모델을 사용합니다. 모델 카드를 보면 Wav2Vec2가 16kHz 샘플링된 음성 오디오를 기반으로 사전훈련된 것을 알 수 있습니다.
-모델을 사전훈련하는 데 사용된 데이터 세트의 샘플링 레이트와 오디오 데이터의 샘플링 레이트가 일치해야 합니다. 데이터의 샘플링 레이트가 다르면 데이터를 리샘플링해야 합니다.
-
-1. 🤗 Datasets의 [`~datasets.Dataset.cast_column`] 메소드를 사용하여 샘플링 레이트를 16kHz로 업샘플링하세요:
-
-```py
->>> dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
-```
-
-2. 오디오 파일을 리샘플링하기 위해 `audio` 열을 다시 호출합니다:
-
-```py
->>> dataset[0]["audio"]
-{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ...,
-         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
- 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
- 'sampling_rate': 16000}
-```
-
-다음으로, 입력을 정규화하고 패딩할 특성 추출기를 가져오세요. 텍스트 데이터의 경우, 더 짧은 시퀀스에 대해 `0`이 추가됩니다. 오디오 데이터에도 같은 개념이 적용됩니다.
-특성 추출기는 배열에 `0`(묵음으로 해석)을 추가합니다.
-
-[`AutoFeatureExtractor.from_pretrained`]를 사용하여 특성 추출기를 가져오세요:
-
-```py
->>> from transformers import AutoFeatureExtractor
-
->>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
-```
-
-오디오 `array`를 특성 추출기에 전달하세요. 또한, 발생할 수 있는 조용한 오류(silent errors)를 더 잘 디버깅할 수 있도록 특성 추출기에 `sampling_rate` 인수를 추가하는 것을 권장합니다.
-
-```py
->>> audio_input = [dataset[0]["audio"]["array"]]
->>> feature_extractor(audio_input, sampling_rate=16000)
-{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ...,
-        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
-```
-
-토크나이저와 마찬가지로 배치 내에서 가변적인 시퀀스를 처리하기 위해 패딩 또는 잘라내기를 적용할 수 있습니다. 이 두 개의 오디오 샘플의 시퀀스 길이를 확인해보세요:
-
-```py
->>> dataset[0]["audio"]["array"].shape
-(173398,)
-
->>> dataset[1]["audio"]["array"].shape
-(106496,)
-```
-
-오디오 샘플의 길이가 동일하도록 데이터 세트를 전처리하는 함수를 만드세요. 최대 샘플 길이를 지정하면 특성 추출기가 해당 길이에 맞춰 시퀀스를 패딩하거나 잘라냅니다:
-
-```py
->>> def preprocess_function(examples):
-...     audio_arrays = [x["array"] for x in examples["audio"]]
-...     inputs = feature_extractor(
-...         audio_arrays,
-...         sampling_rate=16000,
-...         padding=True,
-...         max_length=100000,
-...         truncation=True,
-...     )
-...     return inputs
-```
-
-`preprocess_function`을 데이터 세트의 처음 예시 몇 개에 적용해보세요:
-
-```py
->>> processed_dataset = preprocess_function(dataset[:5])
-```
-
-이제 샘플 길이가 모두 같고 지정된 최대 길이에 맞게 되었습니다. 드디어 전처리된 데이터 세트를 모델에 전달할 수 있습니다!
-
-```py
->>> processed_dataset["input_values"][0].shape
-(100000,)
-
->>> processed_dataset["input_values"][1].shape
-(100000,)
-```
-
-## 컴퓨터 비전[[computer-vision]]
-
-컴퓨터 비전 작업의 경우, 모델에 대한 데이터 세트를 준비하기 위해 [이미지 프로세서](main_classes/image_processor)가 필요합니다.
-이미지 전처리는 이미지를 모델이 예상하는 입력으로 변환하는 여러 단계로 이루어집니다.
-이러한 단계에는 크기 조정, 정규화, 색상 채널 보정, 이미지의 텐서 변환 등이 포함됩니다.
-
-<Tip>
-
-이미지 전처리는 이미지 증강 기법을 몇 가지 적용한 뒤에 할 수도 있습니다.
-이미지 전처리 및 이미지 증강은 모두 이미지 데이터를 변형하지만, 서로 다른 목적을 가지고 있습니다:
-
-* 이미지 증강은 과적합(over-fitting)을 방지하고 모델의 견고함(resiliency)을 높이는 데 도움이 되는 방식으로 이미지를 수정합니다.
-밝기와 색상 조정, 자르기, 회전, 크기 조정, 확대/축소 등 다양한 방법으로 데이터를 증강할 수 있습니다.
-그러나 증강으로 이미지의 의미가 바뀌지 않도록 주의해야 합니다.
-* 이미지 전처리는 이미지가 모델이 예상하는 입력 형식과 일치하도록 보장합니다.
-컴퓨터 비전 모델을 미세 조정할 때 이미지는 모델이 초기에 훈련될 때와 정확히 같은 방식으로 전처리되어야 합니다.
-
-이미지 증강에는 원하는 라이브러리를 무엇이든 사용할 수 있습니다. 이미지 전처리에는 모델과 연결된 `ImageProcessor`를 사용합니다.
-
-</Tip>
-
-[food101](https://huggingface.co/datasets/food101) 데이터 세트를 가져와서 컴퓨터 비전 데이터 세트에서 이미지 프로세서를 어떻게 사용하는지 알아보세요.
-데이터 세트를 불러오는 방법은 🤗 [데이터 세트 튜토리얼](https://huggingface.co/docs/datasets/load_hub)을 참고하세요.
-
-<Tip>
-
-데이터 세트가 상당히 크기 때문에 🤗 Datasets의 `split` 매개변수를 사용하여 훈련 세트에서 작은 샘플만 가져오세요!
-
-</Tip>
-
-```py
->>> from datasets import load_dataset
-
->>> dataset = load_dataset("food101", split="train[:100]")
-```
-
-다음으로, 🤗 Datasets의 [`image`](https://huggingface.co/docs/datasets/package_reference/main_classes?highlight=image#datasets.Image)로 이미지를 확인해보세요:
-
-```py
->>> dataset[0]["image"]
-```
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png"/>
-</div>
-
-[`AutoImageProcessor.from_pretrained`]로 이미지 프로세서를 가져오세요:
-
-```py
->>> from transformers import AutoImageProcessor
-
->>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
-```
-
-먼저 이미지 증강 단계를 추가해 봅시다. 아무 라이브러리나 사용해도 괜찮지만, 이번 튜토리얼에서는 torchvision의 [`transforms`](https://pytorch.org/vision/stable/transforms.html) 모듈을 사용하겠습니다.
-다른 데이터 증강 라이브러리를 사용해보고 싶다면, [Albumentations](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) 또는 [Kornia notebooks](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)에서 어떻게 사용하는지 배울 수 있습니다.
-
-1. [`Compose`](https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html)로  [`RandomResizedCrop`](https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html)와 [`ColorJitter`](https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html) 등 변환을 몇 가지 연결하세요.
-참고로 크기 조정에 필요한 이미지의 크기 요구사항은 `image_processor`에서 가져올 수 있습니다.
-일부 모델은 정확한 높이와 너비를 요구하지만, 제일 짧은 변의 길이(`shortest_edge`)만 정의된 모델도 있습니다.
-
-```py
->>> from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
-
->>> size = (
-...     image_processor.size["shortest_edge"]
-...     if "shortest_edge" in image_processor.size
-...     else (image_processor.size["height"], image_processor.size["width"])
-... )
-
->>> _transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
-```
-
-2. 모델은 입력으로 [`pixel_values`](model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values)를 받습니다.
-`ImageProcessor`는 이미지 정규화 및 적절한 텐서 생성을 처리할 수 있습니다.
-배치 이미지에 대한 이미지 증강 및 이미지 전처리를 결합하고 `pixel_values`를 생성하는 함수를 만듭니다:
-
-```py
->>> def transforms(examples):
-...     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
-...     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
-...     return examples
-```
-
-<Tip>
-
-위의 예에서는 이미지 증강 중에 이미지 크기를 조정했기 때문에 `do_resize=False`로 설정하고, 해당 `image_processor`에서 `size` 속성을 활용했습니다.
-이미지 증강 중에 이미지 크기를 조정하지 않은 경우 이 매개변수를 생략하세요.
-기본적으로는 `ImageProcessor`가 크기 조정을 처리합니다.
-
-증강 변환 과정에서 이미지를 정규화하려면 `image_processor.image_mean` 및 `image_processor.image_std` 값을 사용하세요.
-
-</Tip>
-
-3. 🤗 Datasets의 [`set_transform`](https://huggingface.co/docs/datasets/process#format-transform)를 사용하여 실시간으로 변환을 적용합니다:
-
-```py
->>> dataset.set_transform(transforms)
-```
-
-4. 이제 이미지에 접근하면 이미지 프로세서가 `pixel_values`를 추가한 것을 알 수 있습니다.
-드디어 처리된 데이터 세트를 모델에 전달할 수 있습니다!
-
-```py
->>> dataset[0].keys()
-```
-
-다음은 변형이 적용된 후의 이미지입니다. 이미지가 무작위로 잘려나갔고 색상 속성이 다릅니다.
-
-```py
->>> import numpy as np
->>> import matplotlib.pyplot as plt
-
->>> img = dataset[0]["pixel_values"]
->>> plt.imshow(img.permute(1, 2, 0))
-```
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png"/>
-</div>
-
-<Tip>
-
-`ImageProcessor`는 객체 감지, 시맨틱 세그멘테이션(semantic segmentation), 인스턴스 세그멘테이션(instance segmentation), 파놉틱 세그멘테이션(panoptic segmentation)과 같은 작업에 대한 후처리 방법을 제공합니다.
-이러한 방법은 모델의 원시 출력을 경계 상자나 세그멘테이션 맵과 같은 의미 있는 예측으로 변환해줍니다.
-
-</Tip>
-
-### 패딩[[pad]]
-
-예를 들어, [DETR](./model_doc/detr)와 같은 경우에는 모델이 훈련할 때 크기 조정 증강을 적용합니다.
-이로 인해 배치 내 이미지 크기가 달라질 수 있습니다.
-[`DetrImageProcessor`]의 [`DetrImageProcessor.pad`]를 사용하고 사용자 정의 `collate_fn`을 정의해서 배치 이미지를 처리할 수 있습니다.
-
-```py
->>> def collate_fn(batch):
-...     pixel_values = [item["pixel_values"] for item in batch]
-...     encoding = image_processor.pad(pixel_values, return_tensors="pt")
-...     labels = [item["labels"] for item in batch]
-...     batch = {}
-...     batch["pixel_values"] = encoding["pixel_values"]
-...     batch["pixel_mask"] = encoding["pixel_mask"]
-...     batch["labels"] = labels
-...     return batch
-```
-
-## 멀티모달[[multimodal]]
-
-멀티모달 입력이 필요한 작업의 경우, 모델에 데이터 세트를 준비하기 위한 [프로세서](main_classes/processors)가 필요합니다.
-프로세서는 토크나이저와 특성 추출기와 같은 두 가지 처리 객체를 결합합니다.
-
-[LJ Speech](https://huggingface.co/datasets/lj_speech) 데이터 세트를 가져와서 자동 음성 인식(ASR)을 위한 프로세서를 사용하는 방법을 확인하세요.
-(데이터 세트를 가져오는 방법에 대한 자세한 내용은 🤗 [데이터 세트 튜토리얼](https://huggingface.co/docs/datasets/load_hub)에서 볼 수 있습니다.)
-
-```py
->>> from datasets import load_dataset
-
->>> lj_speech = load_dataset("lj_speech", split="train")
-```
-
-자동 음성 인식(ASR)에서는 `audio`와 `text`에만 집중하면 되므로, 다른 열들은 제거할 수 있습니다:
-
-```py
->>> lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
-```
-
-이제 `audio`와 `text`열을 살펴보세요:
-
-```py
->>> lj_speech[0]["audio"]
-{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ...,
-         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
- 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
- 'sampling_rate': 22050}
-
->>> lj_speech[0]["text"]
-'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
-```
-
-기존에 사전훈련된 모델에서 사용된 데이터 세트와 새로운 오디오 데이터 세트의 샘플링 레이트를 일치시키기 위해 오디오 데이터 세트의 샘플링 레이트를 [리샘플링](preprocessing#audio)해야 합니다!
-
-```py
->>> lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
-```
-
-[`AutoProcessor.from_pretrained`]로 프로세서를 가져오세요:
-
-```py
->>> from transformers import AutoProcessor
-
->>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
-```
-
-1. `array`에 들어 있는 오디오 데이터를 `input_values`로 변환하고 `text`를 토큰화하여 `labels`로 변환하는 함수를 만듭니다.
-모델의 입력은 다음과 같습니다:
-
-```py
->>> def prepare_dataset(example):
-...     audio = example["audio"]
-
-...     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
-
-...     return example
-```
-
-2. 샘플을 `prepare_dataset` 함수에 적용하세요:
-
-```py
->>> prepare_dataset(lj_speech[0])
-```
-
-이제 프로세서가 `input_values`와 `labels`를 추가하고, 샘플링 레이트도 올바르게 16kHz로 다운샘플링했습니다.
-드디어 처리된 데이터 세트를 모델에 전달할 수 있습니다!
--- a/docs/source/ko/run_scripts.md
+++ b/docs/source/ko/run_scripts.md
@ -347,7 +347,7 @@ python examples/pytorch/summarization/run_summarization.py
 모든 스크립트는 최종 모델을 [Model Hub](https://huggingface.co/models)에 업로드할 수 있습니다.
 시작하기 전에 Hugging Face에 로그인했는지 확인하세요:
 ```bash
-huggingface-cli login
+hf auth login
 ```

 그런 다음 스크립트에 `push_to_hub` 인수를 추가합니다.
--- a/docs/source/ko/sagemaker.md
+++ b/docs/source/ko/sagemaker.md
@ -1,28 +0,0 @@
-<!---
-Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Amazon SageMaker에서 학습 실행하기[[run-training-on-amazon-sagemaker]]
-
-문서가 [hf.co/docs/sagemaker](https://huggingface.co/docs/sagemaker)로 이동되었습니다. 이 페이지는 `transformers` 5.0 에서 삭제될 예정입니다. 
-
-### 목차[[table-of-content]]
-
- [Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK](https://huggingface.co/docs/sagemaker/train)
- [Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK](https://huggingface.co/docs/sagemaker/inference)
--- a/docs/source/ko/task_summary.md
+++ b/docs/source/ko/task_summary.md
@ -1,341 +0,0 @@
-<!--Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 🤗 Transformers로 할 수 있는 것[[what__transformers_can_do]]
-
-🤗 Transformers는 자연어처리(NLP), 컴퓨터 비전, 오디오 및 음성 처리 작업에 대한 사전훈련된 최첨단 모델 라이브러리입니다. 
-이 라이브러리는 트랜스포머 모델뿐만 아니라 컴퓨터 비전 작업을 위한 현대적인 합성곱 신경망과 같은 트랜스포머가 아닌 모델도 포함하고 있습니다. 
-
-스마트폰, 앱, 텔레비전과 같은 오늘날 가장 인기 있는 소비자 제품을 살펴보면, 딥러닝 기술이 그 뒤에 사용되고 있을 확률이 높습니다. 
-스마트폰으로 촬영한 사진에서 배경 객체를 제거하고 싶다면 어떻게 할까요? 이는 파놉틱 세그멘테이션 작업의 예입니다(아직 이게 무엇인지 모른다면, 다음 섹션에서 설명하겠습니다!).
-
-이 페이지는 다양한 음성 및 오디오, 컴퓨터 비전, NLP 작업을 🤗 Transformers 라이브러리를 활용하여 다루는 간단한 예제를 3줄의 코드로 제공합니다. 
-
-## 오디오[[audio]]
-
-
-음성 및 오디오 처리 작업은 다른 모달리티와 약간 다릅니다. 이는 주로 오디오가 연속적인 신호로 입력되기 때문입니다. 
-텍스트와 달리 원본 오디오 파형(waveform)은 문장이 단어로 나눠지는 것처럼 깔끔하게 이산적인 묶음으로 나눌 수 없습니다. 
-이를 극복하기 위해 원본 오디오 신호는 일정한 간격으로 샘플링됩니다. 해당 간격 내에서 더 많은 샘플을 취할 경우 샘플링률이 높아지며, 오디오는 원본 오디오 소스에 더 가까워집니다.
-
-과거의 접근 방식은 오디오에서 유용한 특징을 추출하기 위해 오디오를 전처리하는 것이었습니다. 
-하지만 현재는 원본 오디오 파형을 특성 인코더에 직접 넣어서 오디오 표현(representation)을 추출하는 것이 더 일반적입니다. 
-이렇게 하면 전처리 단계가 단순해지고 모델이 가장 중요한 특징을 학습할 수 있습니다.
-
-### 오디오 분류[[audio_classification]]
-
-
-오디오 분류는 오디오 데이터에 미리 정의된 클래스 집합의 레이블을 지정하는 작업입니다. 이는 많은 구체적인 응용 프로그램을 포함한 넓은 범주입니다.
-
-일부 예시는 다음과 같습니다:
-
-* 음향 장면 분류: 오디오에 장면 레이블("사무실", "해변", "경기장")을 지정합니다.
-* 음향 이벤트 감지: 오디오에 소리 이벤트 레이블("차 경적", "고래 울음소리", "유리 파손")을 지정합니다.
-* 태깅: 여러 가지 소리(새 지저귐, 회의에서의 화자 식별)가 포함된 오디오에 레이블을 지정합니다.
-* 음악 분류: 음악에 장르 레이블("메탈", "힙합", "컨트리")을 지정합니다.
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
->>> preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
->>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
->>> preds
-[{'score': 0.4532, 'label': 'hap'},
- {'score': 0.3622, 'label': 'sad'},
- {'score': 0.0943, 'label': 'neu'},
- {'score': 0.0903, 'label': 'ang'}]
-```
-
-### 자동 음성 인식[[automatic_speech_recognition]]
-
-
-자동 음성 인식(ASR)은 음성을 텍스트로 변환하는 작업입니다. 
-음성은 인간의 자연스러운 의사소통 형태이기 때문에 ASR은 가장 일반적인 오디오 작업 중 하나입니다. 
-오늘날 ASR 시스템은 스피커, 전화 및 자동차와 같은 "스마트" 기술 제품에 내장되어 있습니다. 
-우리는 가상 비서에게 음악 재생, 알림 설정 및 날씨 정보를 요청할 수 있습니다.
-
-하지만 트랜스포머 아키텍처가 해결하는 데 도움을 준 핵심 도전 과제 중 하나는 양이 데이터 양이 적은 언어(low-resource language)에 대한 것입니다. 대량의 음성 데이터로 사전 훈련한 후 데이터 양이 적은 언어에서 레이블이 지정된 음성 데이터 1시간만으로 모델을 미세 조정하면 이전의 100배 많은 레이블이 지정된 데이터로 훈련된 ASR 시스템보다 훨씬 더 높은 품질의 결과를 얻을 수 있습니다. 
-```py
->>> from transformers import pipeline
-
->>> transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
->>> transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
-{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
-```
-
-## 컴퓨터 비전[[computer_vision]]
-
-컴퓨터 비전 작업 중 가장 초기의 성공적인 작업 중 하나는 [합성곱 신경망(CNN)](glossary#convolution)을 사용하여 우편번호 숫자 이미지를 인식하는 것이었습니다. 이미지는 픽셀로 구성되어 있으며 각 픽셀은 숫자 값으로 표현됩니다. 이로써 이미지를 픽셀 값의 행렬로 나타내는 것이 쉬워집니다. 특정한 픽셀 값의 조합은 이미지의 색상을 의미합니다.
-
-컴퓨터 비전 작업은 일반적으로 다음 두 가지 방법으로 접근 가능합니다:
-
-1. 합성곱을 사용하여 이미지의 낮은 수준 특징에서 높은 수준의 추상적인 요소까지 계층적으로 학습합니다.
-
-2. 이미지를 패치로 나누고 트랜스포머를 사용하여 점진적으로 각 이미지 패치가 서로 어떠한 방식으로 연관되어 이미지를 형성하는지 학습합니다. `CNN`에서 선호하는 상향식 접근법과는 달리, 이 방식은 흐릿한 이미지로 초안을 그리고 점진적으로 선명한 이미지로 만들어가는 것과 유사합니다.
-
-### 이미지 분류[[image_classification]]
-
-
-이미지 분류는 한 개의 전체 이미지에 미리 정의된 클래스 집합의 레이블을 지정하는 작업입니다. 
-
-대부분의 분류 작업과 마찬가지로, 이미지 분류에는 다양한 실용적인 용도가 있으며, 일부 예시는 다음과 같습니다:
-
-
-* 의료: 질병을 감지하거나 환자 건강을 모니터링하기 위해 의료 이미지에 레이블을 지정합니다.
-* 환경: 위성 이미지를 분류하여 산림 벌채를 감시하고 야생 지역 관리를 위한 정보를 제공하거나 산불을 감지합니다. 
-* 농업: 작물 이미지를 분류하여 식물 건강을 확인하거나 위성 이미지를 분류하여 토지 이용 관찰에 사용합니다.
-* 생태학: 동물이나 식물 종 이미지를 분류하여 야생 동물 개체군을 조사하거나 멸종 위기에 처한 종을 추적합니다.
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline(task="image-classification")
->>> preds = classifier(
-...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
-... )
->>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
->>> print(*preds, sep="\n")
-{'score': 0.4335, 'label': 'lynx, catamount'}
-{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
-{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
-{'score': 0.0239, 'label': 'Egyptian cat'}
-{'score': 0.0229, 'label': 'tiger cat'}
-```
-
-### 객체 탐지[[object_detection]]
-
-
-이미지 분류와 달리 객체 탐지는 이미지 내에서 여러 객체를 식별하고 바운딩 박스로 정의된 객체의 위치를 파악합니다. 
-
-객체 탐지의 몇 가지 응용 예시는 다음과 같습니다:
-
-* 자율 주행 차량: 다른 차량, 보행자 및 신호등과 같은 일상적인 교통 객체를 감지합니다.
-* 원격 감지: 재난 모니터링, 도시 계획 및 기상 예측 등을 수행합니다.
-* 결함 탐지: 건물의 균열이나 구조적 손상, 제조 결함 등을 탐지합니다.
-
-
-```py
->>> from transformers import pipeline
-
->>> detector = pipeline(task="object-detection")
->>> preds = detector(
-...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
-... )
->>> preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
->>> preds
-[{'score': 0.9865,
-  'label': 'cat',
-  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
-```
-
-### 이미지 분할[[image_segmentation]]
-
-
-이미지 분할은 픽셀 차원의 작업으로, 이미지 내의 모든 픽셀을 클래스에 할당합니다. 이는 객체 탐지와 다릅니다. 객체 탐지는 바운딩 박스를 사용하여 이미지 내의 객체를 레이블링하고 예측하는 반면, 분할은 더 세분화된 작업입니다. 분할은 픽셀 수준에서 객체를 감지할 수 있습니다. 
-
-이미지 분할에는 여러 유형이 있습니다:
-
-* 인스턴스 분할: 개체의 클래스를 레이블링하는 것 외에도, 개체의 각 구분된 인스턴스에도 레이블을 지정합니다 ("개-1", "개-2" 등).
-* 파놉틱 분할: 의미적 분할과 인스턴스 분할의 조합입니다. 각 픽셀을 의미적 클래스로 레이블링하는 **동시에** 개체의 각각 구분된 인스턴스로도 레이블을 지정합니다.
-
-분할 작업은 자율 주행 차량에서 유용하며, 주변 환경의 픽셀 수준 지도를 생성하여 보행자와 다른 차량 주변에서 안전하게 탐색할 수 있습니다. 또한 의료 영상에서도 유용합니다. 분할 작업이 픽셀 수준에서 객체를 감지할 수 있기 때문에 비정상적인 세포나 장기의 특징을 식별하는 데 도움이 될 수 있습니다. 이미지 분할은 의류 가상 시착이나 카메라를 통해 실제 세계에 가상 개체를 덧씌워 증강 현실 경험을 만드는 등 전자 상거래 분야에서도 사용될 수 있습니다.
-
-```py
->>> from transformers import pipeline
-
->>> segmenter = pipeline(task="image-segmentation")
->>> preds = segmenter(
-...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
-... )
->>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
->>> print(*preds, sep="\n")
-{'score': 0.9879, 'label': 'LABEL_184'}
-{'score': 0.9973, 'label': 'snow'}
-{'score': 0.9972, 'label': 'cat'}
-```
-
-### 깊이 추정[[depth_estimation]]
-
-깊이 추정은 카메라로부터 이미지 내부의 각 픽셀의 거리를 예측합니다. 이 컴퓨터 비전 작업은 특히 장면 이해와 재구성에 중요합니다. 예를 들어, 자율 주행 차량은 보행자, 교통 표지판 및 다른 차량과 같은 객체와의 거리를 이해하여 장애물과 충돌을 피해야 합니다. 깊이 정보는 또한 2D 이미지에서 3D 표현을 구성하는 데 도움이 되며 생물학적 구조나 건물의 고품질 3D 표현을 생성하는 데 사용될 수 있습니다.
-
-깊이 추정에는 두 가지 접근 방식이 있습니다:
-
-* 스테레오: 약간 다른 각도에서 촬영된 동일한 이미지 두 장을 비교하여 깊이를 추정합니다.
-* 단안: 단일 이미지에서 깊이를 추정합니다.
-
-
-```py
->>> from transformers import pipeline
-
->>> depth_estimator = pipeline(task="depth-estimation")
->>> preds = depth_estimator(
-...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
-... )
-```
-
-## 자연어처리[[natural_language_processing]]
-
-텍스트는 인간이 의사 소통하는 자연스러운 방식 중 하나이기 때문에 자연어처리 역시 가장 일반적인 작업 유형 중 하나입니다. 모델이 인식하는 형식으로 텍스트를 변환하려면 토큰화해야 합니다. 이는 텍스트 시퀀스를 개별 단어 또는 하위 단어(토큰)로 분할한 다음 이러한 토큰을 숫자로 변환하는 것을 의미합니다. 결과적으로 텍스트 시퀀스를 숫자 시퀀스로 표현할 수 있으며, 숫자 시퀀스를 다양한 자연어처리 작업을 해결하기 위한 모델에 입력할 수 있습니다!
-
-### 텍스트 분류[[text_classification]]
-
-다른 모달리티에서의 분류 작업과 마찬가지로 텍스트 분류는 미리 정의된 클래스 집합에서 텍스트 시퀀스(문장 수준, 단락 또는 문서 등)에 레이블을 지정합니다. 텍스트 분류에는 다양한 실용적인 응용 사례가 있으며, 일부 예시는 다음과 같습니다:
-
-* 감성 분석: 텍스트를 `긍정` 또는 `부정`과 같은 어떤 극성에 따라 레이블링하여 정치, 금융, 마케팅과 같은 분야에서 의사 결정에 정보를 제공하고 지원할 수 있습니다.
-* 콘텐츠 분류: 텍스트를 주제에 따라 레이블링(날씨, 스포츠, 금융 등)하여 뉴스 및 소셜 미디어 피드에서 정보를 구성하고 필터링하는 데 도움이 될 수 있습니다.
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline(task="sentiment-analysis")
->>> preds = classifier("Hugging Face is the best thing since sliced bread!")
->>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
->>> preds
-[{'score': 0.9991, 'label': 'POSITIVE'}]
-```
-
-### 토큰 분류[[token_classification]]
-
-모든 자연어처리 작업에서는 텍스트가 개별 단어나 하위 단어로 분리되어 전처리됩니다. 분리된 단어를 [토큰](/glossary#token)이라고 합니다. 토큰 분류는 각 토큰에 미리 정의된 클래스 집합의 레이블을 할당합니다.
-
-토큰 분류의 두 가지 일반적인 유형은 다음과 같습니다:
-
-* 개체명 인식 (NER): 토큰을 조직, 인물, 위치 또는 날짜와 같은 개체 범주에 따라 레이블링합니다. NER은 특히 유전체학적인 환경에서 유전자, 단백질 및 약물 이름에 레이블을 지정하는 데 널리 사용됩니다.
-* 품사 태깅 (POS): 명사, 동사, 형용사와 같은 품사에 따라 토큰에 레이블을 할당합니다. POS는 번역 시스템이 동일한 단어가 문법적으로 어떻게 다른지 이해하는 데 도움이 됩니다 (명사로 사용되는 "bank(은행)"과 동사로 사용되는 "bank(예금을 예치하다)"과 같은 경우).
-
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline(task="ner")
->>> preds = classifier("Hugging Face is a French company based in New York City.")
->>> preds = [
-...     {
-...         "entity": pred["entity"],
-...         "score": round(pred["score"], 4),
-...         "index": pred["index"],
-...         "word": pred["word"],
-...         "start": pred["start"],
-...         "end": pred["end"],
-...     }
-...     for pred in preds
-... ]
->>> print(*preds, sep="\n")
-{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
-{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
-{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
-{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
-{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
-{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
-{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
-```
-
-### 질의응답[[question_answering]]
-
-질의응답은 또 하나의 토큰 차원의 작업으로, 문맥이 있을 때(개방형 도메인)와 문맥이 없을 때(폐쇄형 도메인) 질문에 대한 답변을 반환합니다. 이 작업은 가상 비서에게 식당이 영업 중인지와 같은 질문을 할 때마다 발생할 수 있습니다. 고객 지원 또는 기술 지원을 제공하거나 검색 엔진이 요청한 정보를 검색하는 데 도움을 줄 수 있습니다.
-
-질문 답변에는 일반적으로 두 가지 유형이 있습니다:
-
-* 추출형: 질문과 문맥이 주어졌을 때, 모델이 주어진 문맥의 일부에서 가져온 텍스트의 범위를 답변으로 합니다.
-* 생성형: 질문과 문맥이 주어졌을 때, 주어진 문맥을 통해 답변을 생성합니다. 이 접근 방식은 [`QuestionAnsweringPipeline`] 대신 [`Text2TextGenerationPipeline`]을 통해 처리됩니다.
-
-```py
->>> from transformers import pipeline
-
->>> question_answerer = pipeline(task="question-answering")
->>> preds = question_answerer(
-...     question="What is the name of the repository?",
-...     context="The name of the repository is huggingface/transformers",
-... )
->>> print(
-...     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
-... )
-score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
-```
-
-### 요약[[summarization]]
-
-요약은 원본 문서의 의미를 최대한 보존하면서 긴 문서를 짧은 문서로 만드는 작업입니다. 요약은 `sequence-to-sequence` 작업입니다. 입력보다 짧은 텍스트 시퀀스를 출력합니다. 요약 작업은 독자가 장문 문서들의 주요 포인트를 빠르게 이해하는 데 도움을 줄 수 있습니다. 입법안, 법률 및 금융 문서, 특허 및 과학 논문은 요약 작업이 독자의 시간을 절약하고 독서 보조 도구로 사용될 수 있는 몇 가지 예시입니다.
-
-질문 답변과 마찬가지로 요약에는 두 가지 유형이 있습니다:
-
-* 추출형: 원본 텍스트에서 가장 중요한 문장을 식별하고 추출합니다.
-* 생성형: 원본 텍스트에서 목표 요약을 생성합니다. 입력 문서에 없는 새로운 단어를 포함할 수도 있습니다. [`SummarizationPipeline`]은 생성형 접근 방식을 사용합니다.
-
-```py
->>> from transformers import pipeline
-
->>> summarizer = pipeline(task="summarization")
->>> summarizer(
-...     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
-... )
-[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
-```
-
-### 번역[[translation]]
-
-번역은 한 언어로 된 텍스트 시퀀스를 다른 언어로 변환하는 작업입니다. 이는 서로 다른 배경을 가진 사람들이 서로 소통하는 데 도움을 주는 중요한 역할을 합니다. 더 넓은 대중에게 콘텐츠를 번역하여 전달하거나, 새로운 언어를 배우는 데 도움이 되는 학습 도구가 될 수도 있습니다. 요약과 마찬가지로, 번역은 `sequence-to-sequence` 작업입니다. 즉, 모델은 입력 시퀀스를 받아서 출력이 되는 목표 시퀀스를 반환합니다.
-
-초기의 번역 모델은 대부분 단일 언어로 이루어져 있었지만, 최근에는 많은 언어 쌍 간에 번역을 수행할 수 있는 다중 언어 모델에 대한 관심이 높아지고 있습니다.
-
-```py
->>> from transformers import pipeline
-
->>> text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
->>> translator = pipeline(task="translation", model="google-t5/t5-small")
->>> translator(text)
-[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
-```
-
-### 언어 모델링[[language_modeling]]
-
-언어 모델링은 텍스트 시퀀스에서 단어를 예측하는 작업입니다. 사전 훈련된 언어 모델은 많은 다른 하위 작업에 따라 미세 조정될 수 있기 때문에 매우 인기 있는 자연어처리 작업이 되었습니다. 최근에는 제로 샷(zero-shot) 또는 퓨 샷(few-shot) 학습이 가능한 대규모 언어 모델(Large Language Models, LLM)에 대한 많은 관심이 발생하고 있습니다. 이는 모델이 명시적으로 훈련되지 않은 작업도 해결할 수 있다는 것을 의미합니다! 언어 모델은 유창하고 설득력 있는 텍스트를 생성하는 데 사용될 수 있지만, 텍스트가 항상 정확하지는 않을 수 있으므로 주의가 필요합니다.
-
-언어 모델링에는 두 가지 유형이 있습니다:
-
-* 인과적 언어 모델링: 이 모델의 목적은 시퀀스에서 다음 토큰을 예측하는 것이며, 미래 토큰이 마스킹 됩니다.
-    ```py
-    >>> from transformers import pipeline
-
-    >>> prompt = "Hugging Face is a community-based open-source platform for machine learning."
-    >>> generator = pipeline(task="text-generation")
-    >>> generator(prompt)  # doctest: +SKIP
-    ```
-
-* 마스킹된 언어 모델링: 이 모델의 목적은 시퀀스 내의 마스킹된 토큰을 예측하는 것이며, 시퀀스 내의 모든 토큰에 대한 접근이 제공됩니다.
-    
-    ```py
-    >>> text = "Hugging Face is a community-based open-source <mask> for machine learning."
-    >>> fill_mask = pipeline(task="fill-mask")
-    >>> preds = fill_mask(text, top_k=1)
-    >>> preds = [
-    ...     {
-    ...         "score": round(pred["score"], 4),
-    ...         "token": pred["token"],
-    ...         "token_str": pred["token_str"],
-    ...         "sequence": pred["sequence"],
-    ...     }
-    ...     for pred in preds
-    ... ]
-    >>> preds
-    [{'score': 0.2236,
-      'token': 1761,
-      'token_str': ' platform',
-      'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
-    ```
-
-이 페이지를 통해 각 모달리티의 다양한 작업 유형과 각 작업의 실용적 중요성에 대해 추가적인 배경 정보를 얻으셨기를 바랍니다. 다음 [섹션](tasks_explained)에서는 🤗 Transformer가 이러한 작업을 해결하는 **방법**에 대해 알아보실 수 있습니다.
--- a/docs/source/ko/tasks_explained.md
+++ b/docs/source/ko/tasks_explained.md
@ -1,295 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# 🤗 Transformers로 작업을 해결하는 방법[[how-transformers-solve-tasks]]
-
-[🤗 Transformers로 할 수 있는 작업](task_summary)에서 자연어 처리(NLP), 음성 및 오디오, 컴퓨터 비전 작업 등의 중요한 응용을 배웠습니다. 이 페이지에서는 모델이 이러한 작업을 어떻게 해결하는지 자세히 살펴보고 내부에서 어떤 일이 일어나는지 설명합니다. 주어진 작업을 해결하는 많은 방법이 있으며, 일부 모델은 특정 기술을 구현하거나 심지어 새로운 방식으로 작업에 접근할 수도 있지만, Transformer 모델의 경우 일반적인 아이디어는 동일합니다. 유연한 아키텍처 덕분에 대부분의 모델은 인코더, 디코더 또는 인코더-디코더 구조의 변형입니다. Transformer 모델뿐만 아니라 우리의 라이브러리에는 오늘날 컴퓨터 비전 작업에 사용되는 몇 가지 합성곱 신경망(CNNs)도 있습니다. 또한, 우리는 현대 CNN의 작동 방식에 대해 설명할 것입니다.
-
-작업이 어떻게 해결되는지 설명하기 위해, 유용한 예측을 출력하고자 모델 내부에서 어떤 일이 일어나는지 살펴봅니다.
-
- 오디오 분류 및 자동 음성 인식(ASR)을 위한 [Wav2Vec2](model_doc/wav2vec2)
- 이미지 분류를 위한 [Vision Transformer (ViT)](model_doc/vit) 및 [ConvNeXT](model_doc/convnext)
- 객체 탐지를 위한 [DETR](model_doc/detr)
- 이미지 분할을 위한 [Mask2Former](model_doc/mask2former)
- 깊이 추정을 위한 [GLPN](model_doc/glpn)
- 인코더를 사용하는 텍스트 분류, 토큰 분류 및 질의응답과 같은 NLP 작업을 위한 [BERT](model_doc/bert)
- 디코더를 사용하는 텍스트 생성과 같은 NLP 작업을 위한 [GPT2](model_doc/gpt2)
- 인코더-디코더를 사용하는 요약 및 번역과 같은 NLP 작업을 위한 [BART](model_doc/bart)
-
-<Tip>
-
-더 나아가기 전에, 기존 Transformer 아키텍처에 대한 기본적인 지식을 숙지하는 것이 좋습니다. 인코더, 디코더 및 어텐션의 작동 방식을 알면 다양한 Transformer 모델이 어떻게 작동하는지 이해하는 데 도움이 됩니다. 시작 단계거나 복습이 필요한 경우, 더 많은 정보를 위해 [코스](https://huggingface.co/course/chapter1/4?fw=pt)를 확인하세요!
-
-</Tip>
-
-## 음성 및 오디오[[speech-and-audio]]
-
-[Wav2Vec2](model_doc/wav2vec2)는 레이블이 지정되지 않은 음성 데이터에 대해 사전훈련된 모델로, 오디오 분류 및 자동 음성 인식을 위해 레이블이 지정된 데이터로 미세 조정합니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/wav2vec2_architecture.png"/>
-</div>
-
-이 모델에는 4가지 주요 구성 요소가 있습니다:
-
-1. *특징 인코더(feature encoder)*는 원시 오디오 파형(raw audio waveform)을 가져와서 제로 평균 및 단위 분산으로 표준화하고, 각각 20ms 길이의 특징 벡터의 시퀀스로 변환합니다.
-
-2. 오디오 파형은 본질적으로 연속적이기 때문에, 텍스트 시퀀스를 단어로 나누는 것과 같이 분할할 수 없습니다. 그래서 *양자화 모듈(quantization module)*로 전달되는 특징 벡터는 이산형 음성 단위를 학습하기 위한 것입니다. 음성 단위는 *코드북(codebook)*(어휘집이라고 생각할 수 있습니다)이라는 코드단어(codewords) 콜렉션에서 선택됩니다. 코드북에서 연속적인 오디오 입력을 가장 잘 나타내는 벡터 또는 음성 단위가 선택되어 모델을 통과합니다.
-
-3. 특징 벡터의 절반은 무작위로 마스크가 적용되며, 마스크된 특징 벡터는 *상대적 위치 임베딩*을 추가하는 Transformer 인코더인 *문맥 네트워크(context network)*로 전달됩니다.
-
-4. 문맥 네트워크의 사전훈련 목표는 *대조적 작업(contrastive task)*입니다. 모델은 잘못된 예측 시퀀스에서 마스크된 예측의 실제 양자화된 음성 표현을 예측하며, 모델이 가장 유사한 컨텍스트 벡터와 양자화된 음성 단위(타겟 레이블)를 찾도록 권장합니다.
-
-이제 wav2vec2가 사전훈련되었으므로, 오디오 분류 또는 자동 음성 인식을 위해 데이터에 맞춰 미세 조정할 수 있습니다!
-
-### 오디오 분류[[audio-classification]]
-
-사전훈련된 모델을 오디오 분류에 사용하려면, 기본 Wav2Vec2 모델 상단에 시퀀스 분류 헤드를 추가하면 됩니다. 분류 헤드는 인코더의 은닉 상태(hidden states)를 받는 선형 레이어입니다. 은닉 상태는 각각 길이가 다른 오디오 프레임에서 학습된 특징을 나타냅니다. 고정 길이의 벡터 하나를 만들기 위해, 은닉 상태는 먼저 풀링되고, 클래스 레이블에 대한 로짓으로 변환됩니다. 가장 가능성이 높은 클래스를 찾기 위해 로짓과 타겟 사이의 교차 엔트로피 손실이 계산됩니다.
-
-오디오 분류에 직접 도전할 준비가 되셨나요? 완전한 [오디오 분류 가이드](tasks/audio_classification)를 확인하여 Wav2Vec2를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-### 자동 음성 인식[[automatic-speech-recognition]]
-
-사전훈련된 모델을 자동 음성 인식에 사용하려면, [연결주의적 시간 분류(CTC, Connectionist Temporal Classification)](glossary#connectionist-temporal-classification-ctc)를 위해 기본 Wav2Vec2 모델 상단에 언어 모델링 헤드를 추가합니다. 언어 모델링 헤드는 인코더의 은닉 상태를 받아서 로짓으로 변환합니다. 각 로짓은 토큰 클래스(토큰 수는 작업의 어휘에서 나타납니다)를 나타냅니다. CTC 손실은 텍스트로 디코딩된 토큰에서 가장 가능성이 높은 토큰 시퀀스를 찾기 위해 로짓과 타겟 사이에서 계산됩니다. 
-
-자동 음성 인식에 직접 도전할 준비가 되셨나요? 완전한 [자동 음성 인식 가이드](tasks/asr)를 확인하여 Wav2Vec2를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-## 컴퓨터 비전[[computer-vision]]
-
-컴퓨터 비전 작업에 접근하는 2가지 방법이 있습니다:
-
-1. 이미지를 패치 시퀀스로 분리하고 Transformer로 병렬 처리합니다.
-2. [ConvNeXT](model_doc/convnext)와 같은 현대 CNN을 사용합니다. 이는 합성곱 레이어를 기반으로 하지만 현대 네트워크 설계를 적용합니다.
-
-<Tip>
-
-세 번째 방법은 Transformer와 합성곱(예를 들어, [Convolutional Vision Transformer](model_doc/cvt) 또는 [LeViT](model_doc/levit))을 결합하는 것입니다. 우리는 살펴볼 두 가지 방법만 결합하기 때문에 여기서 이 방법을 다루지 않습니다.
-
-</Tip>
-
-ViT와 ConvNeXT는 일반적으로 이미지 분류에서 사용되지만, 물체 감지, 분할, 깊이 추정과 같은 다른 비전 작업에는 각각 DETR, Mask2Former, GLPN이 더 적합하므로 이러한 모델을 살펴보겠습니다.
-
-### 이미지 분류[[image-classification]]
-
-ViT와 ConvNeXT 모두 이미지 분류에 사용될 수 있지만, ViT는 어텐션 메커니즘을, ConvNeXT는 합성곱을 사용하는 것이 주된 차이입니다.
-
-#### Transformer[[transformer]]
-
-[ViT](model_doc/vit)은 합성곱을 전적으로 순수 Transformer 아키텍처로 대체합니다. 기존 Transformer에 익숙하다면, ViT를 이해하는 방법의 대부분을 이미 파악했다고 볼 수 있습니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/vit_architecture.jpg"/>
-</div>
-
-ViT가 도입한 주요 변경 사항은 이미지가 Transformer로 어떻게 전달되는지에 있습니다:
-
-1. 이미지는 서로 중첩되지 않는 정사각형 패치로 분할되고, 각 패치는 벡터 또는 *패치 임베딩(patch embedding)*으로 변환됩니다. 패치 임베딩은 적절한 입력 차원을 만드는 2D 합성곱 계층에서 생성됩니다(기본 Transformer의 경우 각 패치의 임베딩마다 768개의 값이 필요합니다). 224x224 픽셀 이미지가 있다면, 16x16 이미지 패치 196개로 분할할 수 있습니다. 텍스트가 단어로 토큰화되는 것처럼, 이미지도 패치 시퀀스로 "토큰화"됩니다.
-
-2. *학습 가능한 임베딩(learnable embedding)*(특수한 `[CLS]` 토큰)이 BERT와 같이 패치 임베딩의 시작 부분에 추가됩니다. `[CLS]` 토큰의 마지막 은닉 상태는 부착된 분류 헤드의 입력으로 사용되고, 다른 출력은 무시됩니다. 이 토큰은 모델이 이미지의 표현을 인코딩하는 방법을 학습하는 데 도움이 됩니다.
-
-3. 패치와 학습 가능한 임베딩에 마지막으로 추가할 것은 *위치 임베딩*입니다. 왜냐하면 모델은 이미지 패치의 순서를 모르기 때문입니다. 위치 임베딩도 학습 가능하며, 패치 임베딩과 동일한 크기를 가집니다. 최종적으로, 모든 임베딩이 Transformer 인코더에 전달됩니다.
-
-4. `[CLS]` 토큰을 포함한 출력은 다층 퍼셉트론 헤드(MLP)에 전달됩니다. ViT의 사전훈련 목표는 단순히 분류입니다. 다른 분류 헤드와 같이, MLP 헤드는 출력을 클래스 레이블에 대해 로짓으로 변환하고 교차 엔트로피 손실을 계산하여 가장 가능성이 높은 클래스를 찾습니다.
-
-이미지 분류에 직접 도전할 준비가 되셨나요? 완전한 [이미지 분류 가이드](tasks/image_classification)를 확인하여 ViT를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-#### CNN[[cnn]]
-
-<Tip>
-
-이 섹션에서는 합성곱에 대해 간략하게 설명합니다. 그러나 이미지의 모양과 크기가 어떻게 변화하는지에 대한 사전 이해가 있다면 도움이 될 것입니다. 합성곱에 익숙하지 않은 경우, fastai book의 [합성곱 신경망 챕터](https://github.com/fastai/fastbook/blob/master/13_convolutions.ipynb)를 확인하세요!
-
-</Tip>
-
-[ConvNeXT](model_doc/convnext)는 성능을 높이기 위해 새로운 현대 네트워크 설계를 적용한 CNN 구조입니다. 그러나 합성곱은 여전히 모델의 핵심입니다. 높은 수준의 관점에서 볼 때, [합성곱](glossary#convolution)은 작은 행렬(*커널*)에 이미지 픽셀의 작은 윈도우를 곱하는 연산입니다. 이는 특정 텍스쳐(texture)이나 선의 곡률과 같은 일부 특징을 계산합니다. 그러고 다음 픽셀 윈도우로 넘어가는데, 여기서 합성곱이 이동하는 거리를 *보폭(stride)*이라고 합니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/convolution.gif"/>
-</div>
-
-<small>패딩이나 보폭이 없는 기본 합성곱, <a href="https://huggingface.co/papers/1603.07285">딥러닝을 위한 합성곱 연산 가이드</a></small>
-
-이 출력을 다른 합성곱 레이어에 전달할 수 있으며, 각 연속적인 레이어를 통해 네트워크는 핫도그나 로켓과 같이 더 복잡하고 추상적인 것을 학습합니다. 합성곱 레이어 사이에 풀링 레이어를 추가하여 차원을 줄이고 특징의 위치 변화에 대해 모델을 더 견고하게 만드는 것이 일반적입니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/convnext_architecture.png"/>
-</div>
-
-ConvNeXT는 CNN을 5가지 방식으로 현대화합니다:
-
-1. 각 단계의 블록 수를 변경하고 더 큰 보폭과 그에 대응하는 커널 크기로 이미지를 "패치화(patchify)"합니다. 겹치지 않는 슬라이딩 윈도우는 ViT가 이미지를 패치로 분할하는 방법과 유사하게 이 패치화 전략을 만듭니다.
-
-2. *병목(bottleneck)* 레이어는 채널 수를 줄였다가 다시 복원합니다. 왜냐하면 1x1 합성곱을 수행하는 것이 더 빠르고, 깊이를 늘릴 수 있기 때문입니다. 역 병목(inverted bottlenect)은 채널 수를 확장하고 축소함으로써 그 반대로 수행하므로, 메모리 효율이 더 높습니다.
-
-3. 병목 레이어의 일반적인 3x3 합성곱 레이어를 각 입력 채널에 개별적으로 합성곱을 적용한 다음 마지막에 쌓는 *깊이별 합성곱(depthwise convolution)*으로 대체합니다. 이는 네트워크 폭이 넓혀 성능이 향상됩니다.
-
-4. ViT는 어텐션 메커니즘 덕분에 한 번에 더 많은 이미지를 볼 수 있는 전역 수신 필드를 가지고 있습니다. ConvNeXT는 커널 크기를 7x7로 늘려 이 효과를 재현하려고 시도합니다.
-
-5. 또한 ConvNeXT는 Transformer 모델을 모방하는 몇 가지 레이어 설계를 변경합니다. 활성화 및 정규화 레이어가 더 적고, 활성화 함수가 ReLU 대신 GELU로 전환되고, BatchNorm 대신 LayerNorm을 사용합니다.
-
-합성곱 블록의 출력은 분류 헤드로 전달되며, 분류 헤드는 출력을 로짓으로 변환하고 교차 엔트로피 손실을 계산하여 가장 가능성이 높은 레이블을 찾습니다.
-
-### 객체 탐지[[object-detection]]
-
-[DETR](model_doc/detr), *DEtection TRansformer*는 CNN과 Transformer 인코더-디코더를 결합한 종단간(end-to-end) 객체 탐지 모델입니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/detr_architecture.png"/>
-</div>
-
-1. 사전훈련된 CNN *백본(backbone)*은 픽셀 값으로 나타낸 이미지를 가져와 저해상도 특징 맵을 만듭니다. 특징 맵에 대해 1x1 합성곱을 적용하여 차원을 줄이고, 고수준 이미지 표현을 가진 새로운 특징 맵을 생성합니다. Transformer는 시퀀스 모델이기 때문에 특징 맵을 위치 임베딩과 결합된 특징 벡터의 시퀀스로 평탄화합니다.
-
-2. 특징 벡터는 어텐션 레이어를 사용하여 이미지 표현을 학습하는 인코더에 전달됩니다. 다음으로, 인코더의 은닉 상태는 디코더에서 *객체 쿼리*와 결합됩니다. 객체 쿼리는 이미지의 다른 영역에 초점을 맞춘 학습된 임베딩으로 학습되고, 각 어텐션 레이어를 진행하면서 갱신됩니다. 디코더의 은닉 상태는 각 객체 쿼리에 대한 바운딩 박스 좌표와 클래스 레이블을 예측하는 순방향 네트워크에 전달되며, 객체가 없는 경우 `no object`가 출력됩니다.
-
-    DETR은 각 객체 쿼리를 병렬로 디코딩하여 *N* 개의 최종 예측을 출력합니다. 여기서 *N*은 쿼리 수입니다. 한 번에 하나의 요소를 예측하는 일반적인 자기회귀 모델과 달리, 객체 탐지는 한 번에 *N* 개의 예측을 수행하는 집합 예측 작업(`바운딩 박스`, `클래스 레이블`)입니다.
-
-3. DETR은 훈련 중 *이분 매칭 손실(bipartite matching loss)*을 사용하여 고정된 수의 예측과 고정된 실제 정답 레이블(ground truth labels) 세트를 비교합니다. *N*개의 레이블 세트에 실제 정답 레이블보다 적은 경우, `no object` 클래스로 패딩됩니다. 이 손실 함수는 DETR이 예측과 실제 정답 레이블 간 1:1 대응을 찾도록 권장합니다. 바운딩 박스 또는 클래스 레이블 중 하나라도 잘못된 경우, 손실이 발생합니다. 마찬가지로, 존재하지 않는 객체를 예측하는 경우, 패널티를 받습니다. 이로 인해 DETR은 이미지에서 눈에 잘 띄는 물체 하나에 집중하는 대신, 다른 객체를 찾도록 권장됩니다.
-
-객체 탐지 헤드가 DETR 상단에 추가되어 클래스 레이블과 바운딩 박스의 좌표를 찾습니다. 객체 탐지 헤드에는 두 가지 구성 요소가 있습니다: 디코더 은닉 상태를 클래스 레이블의 로짓으로 변환하는 선형 레이어 및 바운딩 박스를 예측하는 MLP
-
-객체 탐지에 직접 도전할 준비가 되셨나요? 완전한 [객체 탐지 가이드](tasks/object_detection)를 확인하여 DETR을 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-### 이미지 분할[[image-segmentation]]
-
-[Mask2Former](model_doc/mask2former)는 모든 유형의 이미지 분할 작업을 해결하는 범용 아키텍처입니다. 전통적인 분할 모델은 일반적으로 시멘틱(semantic) 또는 파놉틱(panoptic) 분할과 같은 이미지 분할의 특정 하위 작업에 맞춰 조정됩니다. Mask2Former는 모든 작업을 *마스크 분류* 문제로 구성합니다. 마스크 분류는 픽셀을 *N*개 세그먼트로 그룹화하고, 주어진 이미지에 대해 *N*개의 마스크와 그에 대응하는 클래스 레이블을 예측합니다. 이 섹션에서 Mask2Former의 작동 방법을 설명한 다음, 마지막에 SegFormer를 미세 조정해볼 수 있습니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/mask2former_architecture.png"/>
-</div>
-
-Mask2Former에는 3가지 주요 구성 요소가 있습니다:
-
-1. [Swin](model_doc/swin) 백본이 이미지를 받아 3개의 연속된 3x3 합성곱에서 저해상도 이미지 특징 맵을 생성합니다.
-
-2. 특징 맵은 *픽셀 디코더*에 전달됩니다. 이 디코더는 저해상도 특징을 고해상도 픽셀 임베딩으로 점진적으로 업샘플링합니다. 픽셀 디코더는 실제로 원본 이미지의 1/32, 1/16, 1/8 해상도의 다중 스케일 특징(저해상도 및 고해상도 특징 모두 포함)을 생성합니다.
-
-3. 이러한 서로 다른 크기의 특징 맵은 고해상도 특징에서 작은 객체를 포착하기 위해 한 번에 하나의 Transformer 디코더 레이어에 연속적으로 공급됩니다. Mask2Former의 핵심은 디코더의 *마스크 어텐션* 메커니즘입니다. 전체 이미지를 참조할 수 있는 크로스 어텐션(cross-attention)과 달리, 마스크 어텐션은 이미지의 특정 영역에만 집중합니다. 이는 이미지의 지역적 특징만으로 모델이 충분히 학습할 수 있기 때문에 더 빠르고 성능이 우수합니다.
-
-4. [DETR](tasks_explained#object-detection)과 같이, Mask2Former는 학습된 객체 쿼리를 사용하고 이를 픽셀 디코더에서의 이미지 특징과 결합하여 예측 집합(`클래스 레이블`, `마스크 예측`)을 생성합니다. 디코더의 은닉 상태는 선형 레이어로 전달되어 클래스 레이블에 대한 로짓으로 변환됩니다. 로짓과 클래스 레이블 사이의 교차 엔트로피 손실을 계산하여 가장 가능성이 높은 것을 찾습니다.
-
-    마스크 예측은 픽셀 임베딩과 최종 디코더 은닉 상태를 결합하여 생성됩니다. 시그모이드 교차 엔트로피 및 Dice 손실은 로짓과 실제 정답 마스크(ground truth mask) 사이에서 계산되어 가장 가능성이 높은 마스크를 찾습니다.
-
-이미지 분할에 직접 도전할 준비가 되셨나요? 완전한 [이미지 분할 가이드](tasks/semantic_segmentation)를 확인하여 SegFormer를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-### 깊이 추정[[depth-estimation]]
-
-[GLPN](model_doc/glpn), *Global-Local Path Network*는 [SegFormer](model_doc/segformer) 인코더와 경량 디코더를 결합한 깊이 추정을 위한 Transformer입니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/glpn_architecture.jpg"/>
-</div>
-
-1. ViT와 같이, 이미지는 패치 시퀀스로 분할되지만, 이미지 패치가 더 작다는 점이 다릅니다. 이는 세그멘테이션이나 깊이 추정과 같은 밀도 예측 작업에 더 적합합니다. 이미지 패치는 패치 임베딩으로 변환되어(패치 임베딩이 생성되는 방법은 [이미지 분류](#image-classification) 섹션을 참조하세요), 인코더로 전달됩니다.
-
-2. 인코더는 패치 임베딩을 받아, 여러 인코더 블록에 전달합니다. 각 블록은 어텐션 및 Mix-FFN 레이어로 구성됩니다. 후자의 목적은 위치 정보를 제공하는 것입니다. 각 인코더 블록의 끝에는 계층적 표현을 생성하기 위한 *패치 병합(patch merging)* 레이어가 있습니다. 각 인접한 패치 그룹의 특징은 연결되고, 연결된 특징에 선형 레이어가 적용되어 패치 수를 1/4의 해상도로 줄입니다. 이는 다음 인코더 블록의 입력이 되며, 이러한 전체 프로세스는 1/8, 1/16, 1/32 해상도의 이미지 특징을 가질 때까지 반복됩니다.
-
-3. 경량 디코더는 인코더에서 마지막 특징 맵(1/32 크기)을 가져와 1/16 크기로 업샘플링합니다. 여기서, 특징은 *선택적 특징 융합(SFF, Selective Feature Fusion)* 모듈로 전달됩니다. 이 모듈은 각 특징에 대해 어텐션 맵에서 로컬 및 전역 특징을 선택하고 결합한 다음, 1/8로 업샘플링합니다. 이 프로세스는 디코딩된 특성이 원본 이미지와 동일한 크기가 될 때까지 반복됩니다. 출력은 두 개의 합성곱 레이어를 거친 다음, 시그모이드 활성화가 적용되어 각 픽셀의 깊이를 예측합니다.
-
-## 자연어처리[[natural-language-processing]]
-
-Transformer는 초기에 기계 번역을 위해 설계되었고, 그 이후로는 사실상 모든 NLP 작업을 해결하기 위한 기본 아키텍처가 되었습니다. 어떤 작업은 Transformer의 인코더 구조에 적합하며, 다른 작업은 디코더에 더 적합합니다. 또 다른 작업은 Transformer의 인코더-디코더 구조를 모두 활용합니다.
-
-### 텍스트 분류[[text-classification]]
-
-[BERT](model_doc/bert)는 인코더 전용 모델이며, 텍스트의 풍부한 표현을 학습하기 위해 양방향의 단어에 주목함으로써 심층 양방향성(deep bidirectionality)을 효과적으로 구현한 최초의 모델입니다.
-
-1. BERT는 [WordPiece](tokenizer_summary#wordpiece) 토큰화를 사용하여 문장의 토큰 임베딩을 생성합니다. 단일 문장과 한 쌍의 문장을 구분하기 위해 특수한 `[SEP]` 토큰이 추가됩니다. 모든 텍스트 시퀀스의 시작 부분에는 특수한 `[CLS]` 토큰이 추가됩니다. `[CLS]` 토큰이 있는 최종 출력은 분류 작업을 위한 분류 헤드로 입력에 사용됩니다. BERT는 또한 한 쌍의 문장에서 각 토큰이 첫 번째 문장인지 두 번째 문장에 속하는지 나타내는 세그먼트 임베딩(segment embedding)을 추가합니다.
-
-2. BERT는 마스크드 언어 모델링과 다음 문장 예측, 두 가지 목적으로 사전훈련됩니다. 마스크드 언어 모델링에서는 입력 토큰의 일부가 무작위로 마스킹되고, 모델은 이를 예측해야 합니다. 이는 모델이 모든 단어를 보고 다음 단어를 "예측"할 수 있는 양방향성 문제를 해결합니다. 예측된 마스크 토큰의 최종 은닉 상태는 어휘에 대한 소프트맥스가 있는 순방향 네트워크로 전달되어 마스크된 단어를 예측합니다.
-
-    두 번째 사전훈련 대상은 다음 문장 예측입니다. 모델은 문장 B가 문장 A 다음에 오는지 예측해야 합니다. 문장 B가 다음 문장인 경우와 무작위 문장인 경우 각각 50%의 확률로 발생합니다. 다음 문장인지 아닌지에 대한 예측은 두 개의 클래스(`IsNext` 및 `NotNext`)에 대한 소프트맥스가 있는 순방향 네트워크로 전달됩니다.
-
-3. 입력 임베딩은 여러 인코더 레이어를 거쳐서 최종 은닉 상태를 출력합니다.
-
-사전훈련된 모델을 텍스트 분류에 사용하려면, 기본 BERT 모델 상단에 시퀀스 분류 헤드를 추가합니다. 시퀀스 분류 헤드는 최종 은닉 상태를 받는 선형 레이어이며, 로짓으로 변환하기 위해 선형 변환을 수행합니다. 교차 엔트로피 손실은 로짓과 타겟 간에 계산되어 가장 가능성이 높은 레이블을 찾습니다. 
-
-텍스트 분류에 직접 도전할 준비가 되셨나요? 완전한 [텍스트 분류 가이드](tasks/sequence_classification)를 확인하여 DistilBERT를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-### 토큰 분류[[token-classification]]
-
-개체명 인식(Named Entity Recognition, NER)과 같은 토큰 분류 작업에 BERT를 사용하려면, 기본 BERT 모델 상단에 토큰 분류 헤드를 추가합니다. 토큰 분류 헤드는 최종 은닉 상태를 받는 선형 레이어이며, 로짓으로 변환하기 위해 선형 변환을 수행합니다. 교차 엔트로피 손실은 로짓과 각 토큰 간에 계산되어 가장 가능성이 높은 레이블을 찾습니다. 
-
-토큰 분류에 직접 도전할 준비가 되셨나요? 완전한 [토큰 분류 가이드](tasks/token_classification)를 확인하여 DistilBERT를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-### 질의응답[[question-answering]]
-
-질의응답에 BERT를 사용하려면, 기본 BERT 모델 위에 스팬(span) 분류 헤드를 추가합니다. 이 선형 레이어는 최종 은닉 상태를 받고, 답변에 대응하는 `스팬`의 시작과 끝 로그를 계산하기 위해 선형 변환을 수행합니다. 교차 엔트로피 손실은 로짓과 각 레이블 위치 간에 계산되어 답변에 대응하는 가장 가능성이 높은 텍스트의 스팬을 찾습니다. 
-
-질의응답에 직접 도전할 준비가 되셨나요? 완전한 [질의응답 가이드](tasks/question_answering)를 확인하여 DistilBERT를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-<Tip>
-
-💡 사전훈련된 BERT를 다양한 작업에 사용하는 것이 얼마나 쉬운지 주목하세요. 사전훈련된 모델에 특정 헤드를 추가하기만 하면 은닉 상태를 원하는 출력으로 조작할 수 있습니다!
-
-</Tip>
-
-### 텍스트 생성[[text-generation]]
-
-[GPT-2](model_doc/gpt2)는 대량의 텍스트에 대해 사전훈련된 디코딩 전용 모델입니다. 프롬프트를 주어지면 설득력 있는 (항상 사실은 아니지만!) 텍스트를 생성하고 명시적으로 훈련되지 않았음에도 불구하고 질의응답과 같은 다른 NLP 작업을 완수할 수 있습니다.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/gpt2_architecture.png"/>
-</div>
-
-1. GPT-2는 단어를 토큰화하고 토큰 임베딩을 생성하기 위해 [바이트 페어 인코딩(BPE, byte pair encoding)](tokenizer_summary#bytepair-encoding-bpe)을 사용합니다. 위치 인코딩은 시퀀스에서 각 토큰의 위치를 나타내기 위해 토큰 임베딩에 추가됩니다. 입력 임베딩은 여러 디코더 블록을 거쳐 일부 최종 은닉 상태를 출력합니다. 각 디코더 블록 내에서 GPT-2는 *마스크드 셀프 어텐션(masked self-attention)* 레이어를 사용합니다. 이는 GPT-2가 이후 토큰(future tokens)에 주의를 기울일 수 없도록 합니다. 왼쪽에 있는 토큰에만 주의를 기울일 수 있습니다. 마스크드 셀프 어텐션에서는 어텐션 마스크를 사용하여 이후 토큰에 대한 점수(score)를 `0`으로 설정하기 때문에 BERT의 [`mask`] 토큰과 다릅니다.
-
-2. 디코더의 출력은 언어 모델링 헤드에 전달되며, 언어 모델링 헤드는 은닉 상태를 로짓으로 선형 변환을 수행합니다. 레이블은 시퀀스의 다음 토큰으로, 로짓을 오른쪽으로 하나씩 이동하여 생성됩니다. 교차 엔트로피 손실은 이동된 로짓과 레이블 간에 계산되어 가장 가능성이 높은 다음 토큰을 출력합니다.
-
-GPT-2의 사전훈련 목적은 전적으로 [인과적 언어 모델링](glossary#causal-language-modeling)에 기반하여, 시퀀스에서 다음 단어를 예측하는 것입니다. 이는 GPT-2가 텍스트 생성에 관련된 작업에 특히 우수하도록 합니다.
-
-텍스트 생성에 직접 도전할 준비가 되셨나요? 완전한 [인과적 언어 모델링 가이드](tasks/language_modeling#causal-language-modeling)를 확인하여 DistilGPT-2를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-<Tip>
-
-텍스트 생성에 대한 자세한 내용은 [텍스트 생성 전략](generation_strategies) 가이드를 확인하세요!
-
-</Tip>
-
-### 요약[[summarization]]
-
-[BART](model_doc/bart) 및 [T5](model_doc/t5)와 같은 인코더-디코더 모델은 요약 작업의 시퀀스-투-시퀀스 패턴을 위해 설계되었습니다. 이 섹션에서 BART의 작동 방법을 설명한 다음, 마지막에 T5를 미세 조정해볼 수 있습니다. 
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bart_architecture.png"/>
-</div>
-
-1. BART의 인코더 아키텍처는 BERT와 매우 유사하며 텍스트의 토큰 및 위치 임베딩을 받습니다. BART는 입력을 변형시키고 디코더로 재구성하여 사전훈련됩니다. 특정 변형 기법이 있는 다른 인코더와는 달리, BART는 모든 유형의 변형을 적용할 수 있습니다. 그러나 *text infilling* 변형 기법이 가장 잘 작동합니다. Text Infiling에서는 여러 텍스트 스팬을 **단일** [`mask`] 토큰으로 대체합니다. 이는 모델이 마스크된 토큰을 예측해야 하고, 모델에 누락된 토큰의 수를 예측하도록 가르치기 때문에 중요합니다. 입력 임베딩과 마스크된 스팬이 인코더를 거쳐 최종 은닉 상태를 출력하지만, BERT와 달리 BART는 마지막에 단어를 예측하는 순방향 네트워크를 추가하지 않습니다.
-
-2. 인코더의 출력은 디코더로 전달되며, 디코더는 인코더의 출력에서 마스크 토큰과 변형되지 않은 토큰을 예측해야 합니다. 이는 디코더가 원본 텍스트를 복원하는 데 도움이 되는 추가적인 문맥을 얻도록 합니다. 디코더의 출력은 언어 모델링 헤드에 전달되며, 언어 모델링 헤드는 은닉 상태를 로짓으로 선형 변환을 수행합니다. 교차 엔트로피 손실은 로짓과 토큰이 오른쪽으로 이동된 레이블 간에 계산됩니다.
-
-요약에 직접 도전할 준비가 되셨나요? 완전한 [요약 가이드](tasks/summarization)를 확인하여 T5를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-<Tip>
-
-텍스트 생성에 대한 자세한 내용은 [텍스트 생성 전략](generation_strategies) 가이드를 확인하세요!
-
-</Tip>
-
-### 번역[[translation]]
-
-번역은 시퀀스-투-시퀀스 작업의 또 다른 예로, [BART](model_doc/bart) 또는 [T5](model_doc/t5)와 같은 인코더-디코더 모델을 사용할 수 있습니다. 이 섹션에서 BART의 작동 방법을 설명한 다음, 마지막에 T5를 미세 조정해볼 수 있습니다. 
-
-BART는 원천 언어를 타겟 언어로 디코딩할 수 있는 입력에 매핑하기 위해 무작위로 초기화된 별도의 인코더를 추가하여 번역에 적용합니다. 이 새로운 인코더의 임베딩은 원본 단어 임베딩 대신 사전훈련된 인코더로 전달됩니다. 원천 인코더는 모델 출력의 교차 엔트로피 손실로부터 원천 인코더, 위치 임베딩, 입력 임베딩을 갱신하여 훈련됩니다. 첫 번째 단계에서는 모델 파라미터가 고정되고, 두 번째 단계에서는 모든 모델 파라미터가 함께 훈련됩니다.
-
-BART는 이후 번역을 위해 다양한 언어로 사전훈련된 다국어 버전의 mBART로 확장되었습니다.
-
-번역에 직접 도전할 준비가 되셨나요? 완전한 [번역 가이드](tasks/summarization)를 확인하여 T5를 미세 조정하고 추론에 사용하는 방법을 학습하세요!
-
-<Tip>
-
-텍스트 생성에 대한 자세한 내용은 [텍스트 생성 전략](generation_strategies) 가이드를 확인하세요!
-
-</Tip>
--- a/docs/source/ko/tf_xla.md
+++ b/docs/source/ko/tf_xla.md
@ -1,174 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# TensorFlow 모델을 위한 XLA 통합 [[xla-integration-for-tensorflow-models]]
-
-[[open-in-colab]]
-
-XLA(Accelerated Linear Algebra)는 TensorFlow 모델의 실행 시간을 가속화하기 위한 컴파일러입니다. [공식 문서](https://www.tensorflow.org/xla)에 따르면 다음과 같습니다:
-
-XLA(Accelerated Linear Algebra)는 선형 대수를 위한 도메인 특화 컴파일러로, TensorFlow 모델을 소스 코드 변경 없이 가속화할 수 있습니다.
-
-TensorFlow에서 XLA를 사용하는 것은 간단합니다. XLA는 `tensorflow` 라이브러리 내에 패키지로 제공되며, [`tf.function`](https://www.tensorflow.org/guide/intro_to_graphs)과 같은 그래프 생성 함수에서 `jit_compile` 인수를 사용하여 활성화할 수 있습니다. `fit()` 및 `predict()`와 같은 Keras 메소드를 사용하는 경우, `jit_compile` 인수를 `model.compile()`에 전달하여 XLA를 간단하게 활성화할 수 있습니다. 그러나 XLA는 이러한 메소드에 국한되지 않고 임의의 `tf.function`을 가속화하는 데에도 사용할 수 있습니다.
-
-🤗 Transformers에서는 [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2), [T5](https://huggingface.co/docs/transformers/model_doc/t5), [OPT](https://huggingface.co/docs/transformers/model_doc/opt)와 같은 모델의 텍스트 생성, 그리고 [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper)와 같은 모델의 음성 처리를 포함하여 여러 TensorFlow 메소드가 XLA와 호환되도록 다시 작성되었습니다.
-
-정확한 속도 향상은 모델에 따라 다르지만, 🤗 Transformers 내의 TensorFlow 텍스트 생성 모델의 경우 최대 100배의 속도 향상을 확인했습니다. 이 문서에서는 이러한 모델에 대해 XLA를 사용하여 최대 성능을 얻는 방법을 설명합니다. 또한 XLA 통합의 벤치마크 및 디자인 철학에 대한 추가 자료 링크도 제공할 것입니다.
-
-## XLA를 사용하여 TF 함수 실행하기 [[running-tf-functions-with-xla]]
-
-TensorFlow에서 다음과 같은 모델을 고려해 봅시다:
-
-```py
-import tensorflow as tf
-
-model = tf.keras.Sequential(
-    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
-)
-```
-
-위 모델은 차원이 `(10, )`인 입력을 받습니다. 다음과 같이 모델을 사용하여 순전파를 실행할 수 있습니다:
-
-```py
-# 모델에 대한 임의의 입력을 생성합니다.
-batch_size = 16
-input_vector_dim = 10
-random_inputs = tf.random.normal((batch_size, input_vector_dim))
-
-# 순전파를 실행합니다.
-_ = model(random_inputs)
-```
-
-XLA로 컴파일된 함수로 순전파를 실행하려면 다음과 같이 해야 합니다:
-
-```py
-xla_fn = tf.function(model, jit_compile=True)
-_ = xla_fn(random_inputs)
-```
-
-`model`의 기본 `call()` 함수는 XLA 그래프를 컴파일하는 데 사용됩니다. 그러나 다른 모델 함수를 XLA로 컴파일하려면 다음과 같이 할 수도 있습니다:
-
-```py
-my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
-```
-
-## 🤗 Transformers에서 XLA를 사용하여 TF 텍스트 생성 모델 실행하기 [[running-a-tf-text-generation-model-with-xla-from-transformers]]
-
-🤗 Transformers에서 XLA로 가속화된 생성을 활성화하려면 최신 버전의 `transformers`가 설치되어 있어야 합니다. 다음과 같이 설치할 수 있습니다:
-
-```bash
-pip install transformers --upgrade
-```
-
-그리고 다음 코드를 실행할 수 있습니다:
-
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForCausalLM
-
-# 최소 버전의 Transformers가 설치되어 있지 않다면 오류가 발생합니다.
-from transformers.utils import check_min_version
-
-check_min_version("4.21.0")
-
-
-tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="</s>")
-model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
-input_string = ["TensorFlow is"]
-
-# XLA 생성 함수를 만들기 위한 한 줄
-xla_generate = tf.function(model.generate, jit_compile=True)
-
-tokenized_input = tokenizer(input_string, return_tensors="tf")
-generated_tokens = xla_generate(**tokenized_input, num_beams=2)
-
-decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-print(f"Generated -- {decoded_text}")
-# Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
-```
-
-알 수 있듯이, `generate()`에서 XLA를 활성화하는 것은 단 한 줄의 코드입니다. 코드의 나머지 부분은 변경되지 않습니다. 그러나 위 코드 스니펫에서는 XLA에 특정한 몇 가지 주의할 점이 있습니다. XLA가 가져다줄 속도 향상을 실현하기 위해서는 이를 알고 있어야 합니다. 다음 섹션에서 이에 대해 논의합니다.
-
-## 주의할 점 [[gotchas-to-be-aware-of]]
-
-XLA 활성화 함수(`xla_generate()`와 같은)를 처음 실행할 때 내부적으로 계산 그래프를 추론하려고 하며, 이는 시간이 소요됩니다. 이 과정은 [“추적(tracing)”](https://www.tensorflow.org/guide/intro_to_graphs#when_is_a_function_tracing)이라고 알려져 있습니다.
-
-생성 시간이 빠르지 않다는 것을 알 수 있을 것입니다. `xla_generate()`(또는 다른 XLA 활성화 함수)의 연속 호출은 함수에 전달된 입력이 초기에 구축된 계산 그래프와 동일한 형태를 따른다면, 계산 그래프를 추론할 필요가 없습니다. 이는 입력 형태가 고정된 모달리티(예: 이미지)에는 문제가 되지 않지만, 가변 입력 형태 모달리티(예: 텍스트)를 사용할 때 주의해야 합니다.
-
-`xla_generate()`가 항상 동일한 입력 형태로 동작하도록 하려면, 토크나이저를 호출할 때 `padding` 인수를 지정할 수 있습니다.
-
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForCausalLM
-
-tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="</s>")
-model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
-input_string = ["TensorFlow is"]
-
-xla_generate = tf.function(model.generate, jit_compile=True)
-
-# 여기서, padding 옵션이 있는 토크나이저를 호출합니다.
-tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
-
-generated_tokens = xla_generate(**tokenized_input, num_beams=2)
-decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-print(f"Generated -- {decoded_text}")
-```
-
-이렇게 하면 `xla_generate()`에 대한 입력이 항상 추적된 형태로 전달되어 생성 시간이 가속화됩니다. 다음 코드로 이를 확인할 수 있습니다:
-
-```py
-import time
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForCausalLM
-
-tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="</s>")
-model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
-
-xla_generate = tf.function(model.generate, jit_compile=True)
-
-for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
-    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
-    start = time.time_ns()
-    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
-    end = time.time_ns()
-    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
-```
-
-Tesla T4 GPU에서는 다음과 같은 출력을 예상할 수 있습니다:
-
-```bash
-Execution time -- 30819.6 ms
-
-Execution time -- 79.0 ms
-
-Execution time -- 78.9 ms
-```
-`xla_generate()`의 첫 번째 호출은 추적 때문에 시간이 오래 걸리지만, 연속 호출은 몇 배나 빠릅니다. 생성 옵션에 대한 어떤 변경이든 다시 추적을 유발하므로 생성 시간이 느려질 수 있음을 명심하세요.
-
-이 문서에서는 🤗 Transformers에서 제공하는 모든 텍스트 생성 옵션을 다루지 않았습니다. 고급 사용 사례에 대해 문서를 참조하시기 바랍니다.
-
-## 추가 자료 [[additional-resources]]
-
-여기에 🤗 Transformers와 XLA에 대해 더 자세히 알고 싶은 경우 도움이 될 수 있는 몇 가지 추가 자료를 제공합니다. 
- 
-* [이 Colab 노트북](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/91_tf_xla_generate.ipynb)은 XLA와 호환되는 인코더-디코더([T5](https://huggingface.co/docs/transformers/model_doc/t5)와 같은) 및 디코더 전용([GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2)와 같은) 텍스트 생성 모델을 실험해 볼 수 있는 대화형 데모를 제공합니다.
-* [이 블로그 글](https://huggingface.co/blog/tf-xla-generate)은 TensorFlow에서 XLA에 대한 친절한 소개와 함께 XLA와 호환되는 모델의 비교 벤치마크에 대한 개요를 제공합니다.
-* [이 블로그 글](https://blog.tensorflow.org/2022/11/how-hugging-face-improved-text-generation-performance-with-xla.html)은 🤗 Transformers의 TensorFlow 모델에 XLA 지원을 추가하는 것에 대한 디자인 철학을 논의합니다.
-* XLA와 TensorFlow 그래프에 대해 더 자세히 알고 싶은 경우 추천하는 글:
-    * [XLA: 기계 학습을 위한 최적화 컴파일러](https://www.tensorflow.org/xla)
-    * [그래프 및 tf.function 소개](https://www.tensorflow.org/guide/intro_to_graphs)
-    * [tf.function으로 성능 향상하기](https://www.tensorflow.org/guide/function) 
--- a/docs/source/pt/custom_models.md
+++ b/docs/source/pt/custom_models.md
@ -284,7 +284,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 Agora para enviar o modelo para o Hub, certifique-se de estar logado. Ou execute no seu terminal:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 ou a partir do notebook:
--- a/docs/source/pt/run_scripts.md
+++ b/docs/source/pt/run_scripts.md
@ -327,7 +327,7 @@ python examples/pytorch/summarization/run_summarization.py
 Todos os scripts podem enviar seu modelo final para o [Model Hub](https://huggingface.co/models). Certifique-se de estar conectado ao Hugging Face antes de começar:

 ```bash
-huggingface-cli login
+hf auth login
 ```

 Em seguida, adicione o argumento `push_to_hub` ao script. Este argumento criará um repositório com seu nome de usuário do Hugging Face e o nome da pasta especificado em `output_dir`.
--- a/docs/source/zh/custom_models.md
+++ b/docs/source/zh/custom_models.md
@ -246,7 +246,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 现在要将模型推送到集线器，请确保你已登录。你看可以在终端中运行以下命令：

 ```bash
-huggingface-cli login
+hf auth login
 ```

 或者在笔记本中运行以下代码：
--- a/docs/source/zh/model_sharing.md
+++ b/docs/source/zh/model_sharing.md
@ -56,7 +56,7 @@ Model Hub的内置版本控制基于git和[git-lfs](https://git-lfs.github.com/)


 ```bash
-huggingface-cli login
+hf auth login
 ```

 如果您正在使用像Jupyter或Colaboratory这样的`notebook`，请确保您已安装了[`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library)库。该库允许您以编程方式与Hub进行交互。
--- a/docs/source/zh/run_scripts.md
+++ b/docs/source/zh/run_scripts.md
@ -331,7 +331,7 @@ python examples/pytorch/summarization/run_summarization.py
 所有脚本都可以将您的最终模型上传到[Model Hub](https://huggingface.co/models)。在开始之前，请确保您已登录Hugging Face：

 ```bash
-huggingface-cli login
+hf auth login
 ```

 然后，在脚本中添加`push_to_hub`参数。这个参数会创建一个带有您Hugging Face用户名和`output_dir`中指定的文件夹名称的仓库。
--- a/examples/flax/README.md
+++ b/examples/flax/README.md
@ -79,5 +79,5 @@ To specify a given repository name, use the `--hub_model_id` argument. You will

 A few notes on this integration:

- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `huggingface-cli login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument.
+- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `hf auth login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument.
 - the `output_dir` you pick will either need to be a new folder or a local clone of the distant repository you are using.
--- a/examples/flax/image-captioning/run_image_captioning_flax.py
+++ b/examples/flax/image-captioning/run_image_captioning_flax.py
@ -186,7 +186,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -906,7 +906,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/language-modeling/run_bart_dlm_flax.py
+++ b/examples/flax/language-modeling/run_bart_dlm_flax.py
@ -172,7 +172,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -530,7 +530,7 @@ def main():
            trust_remote_code=data_args.trust_remote_code,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
@ -567,7 +567,7 @@ def main():
            num_proc=data_args.preprocessing_num_workers,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                extension,
                data_files=data_files,
@ -671,7 +671,7 @@ def main():
    # max_seq_length.
    def group_texts(examples):
        # Concatenate all texts.
-        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+        concatenated_examples = {k: list(chain(*examples[k])) for k in examples}
        total_length = len(concatenated_examples[list(examples.keys())[0]])
        # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
@ -777,7 +777,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/language-modeling/run_clm_flax.py
+++ b/examples/flax/language-modeling/run_clm_flax.py
@ -173,7 +173,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -407,7 +407,7 @@ def main():
            trust_remote_code=model_args.trust_remote_code,
        )

-        if "validation" not in dataset.keys():
+        if "validation" not in dataset:
            dataset["validation"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
@ -447,7 +447,7 @@ def main():
            num_proc=data_args.preprocessing_num_workers,
        )

-        if "validation" not in dataset.keys():
+        if "validation" not in dataset:
            dataset["validation"] = load_dataset(
                extension,
                data_files=data_files,
@ -580,7 +580,7 @@ def main():
    # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
    def group_texts(examples):
        # Concatenate all texts.
-        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+        concatenated_examples = {k: list(chain(*examples[k])) for k in examples}
        total_length = len(concatenated_examples[list(examples.keys())[0]])
        # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
@ -674,7 +674,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/language-modeling/run_mlm_flax.py
+++ b/examples/flax/language-modeling/run_mlm_flax.py
@ -179,7 +179,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -448,7 +448,7 @@ def main():
            trust_remote_code=model_args.trust_remote_code,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
@ -485,7 +485,7 @@ def main():
            num_proc=data_args.preprocessing_num_workers,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                extension,
                data_files=data_files,
@ -603,7 +603,7 @@ def main():
        # max_seq_length.
        def group_texts(examples):
            # Concatenate all texts.
-            concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+            concatenated_examples = {k: list(chain(*examples[k])) for k in examples}
            total_length = len(concatenated_examples[list(examples.keys())[0]])
            # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
            # customize this part to your needs.
@ -707,7 +707,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/language-modeling/run_t5_mlm_flax.py
+++ b/examples/flax/language-modeling/run_t5_mlm_flax.py
@ -173,7 +173,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -572,7 +572,7 @@ def main():
            trust_remote_code=data_args.trust_remote_code,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
@ -609,7 +609,7 @@ def main():
            num_proc=data_args.preprocessing_num_workers,
        )

-        if "validation" not in datasets.keys():
+        if "validation" not in datasets:
            datasets["validation"] = load_dataset(
                extension,
                data_files=data_files,
@ -703,7 +703,7 @@ def main():
    # Main data processing function that will concatenate all texts from our dataset and generate chunks of expanded_inputs_length.
    def group_texts(examples):
        # Concatenate all texts.
-        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+        concatenated_examples = {k: list(chain(*examples[k])) for k in examples}
        total_length = len(concatenated_examples[list(examples.keys())[0]])
        # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
@ -814,7 +814,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/question-answering/run_qa.py
+++ b/examples/flax/question-answering/run_qa.py
@ -60,7 +60,7 @@ from transformers.utils import check_min_version, send_example_telemetry
 logger = logging.getLogger(__name__)

 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 Array = Any
 Dataset = datasets.arrow_dataset.Dataset
@ -159,7 +159,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -345,7 +345,7 @@ def create_train_state(
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py
+++ b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version


 # Will error if the minimal version of Transformers is not installed. Remove at your own risk.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")

@ -668,7 +668,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/summarization/run_summarization_flax.py
+++ b/examples/flax/summarization/run_summarization_flax.py
@ -192,7 +192,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -768,7 +768,7 @@ def main():
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/text-classification/run_flax_glue.py
+++ b/examples/flax/text-classification/run_flax_glue.py
@ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry

 logger = logging.getLogger(__name__)
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 Array = Any
 Dataset = datasets.arrow_dataset.Dataset
@ -107,7 +107,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -249,7 +249,7 @@ def create_train_state(
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/token-classification/run_flax_ner.py
+++ b/examples/flax/token-classification/run_flax_ner.py
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version

 logger = logging.getLogger(__name__)
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

@ -155,7 +155,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -310,7 +310,7 @@ def create_train_state(
        layer_norm_named_params = {
            layer[-2:]
            for layer_norm_name in layer_norm_candidates
-            for layer in flat_params.keys()
+            for layer in flat_params
            if layer_norm_name in "".join(layer).lower()
        }
        flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
--- a/examples/flax/vision/run_image_classification.py
+++ b/examples/flax/vision/run_image_classification.py
@ -163,7 +163,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
--- a/examples/legacy/pytorch-lightning/lightning_base.py
+++ b/examples/legacy/pytorch-lightning/lightning_base.py
@ -379,8 +379,8 @@ def generic_train(
        train_params["distributed_backend"] = "ddp"

    train_params["accumulate_grad_batches"] = args.accumulate_grad_batches
-    train_params["accelerator"] = extra_train_kwargs.get("accelerator", None)
-    train_params["profiler"] = extra_train_kwargs.get("profiler", None)
+    train_params["accelerator"] = extra_train_kwargs.get("accelerator")
+    train_params["profiler"] = extra_train_kwargs.get("profiler")

    trainer = pl.Trainer.from_argparse_args(
        args,
--- a/examples/legacy/seq2seq/download_wmt.py
+++ b/examples/legacy/seq2seq/download_wmt.py
@ -44,7 +44,7 @@ def download_wmt_dataset(src_lang="ro", tgt_lang="en", dataset="wmt16", save_dir
    save_dir = Path(save_dir)
    save_dir.mkdir(exist_ok=True)

-    for split in ds.keys():
+    for split in ds:
        print(f"Splitting {split} with {ds[split].num_rows} records")

        # to save to val.source, val.target like summary datasets
--- a/examples/modular-transformers/modeling_test_detr.py
+++ b/examples/modular-transformers/modeling_test_detr.py
@ -202,7 +202,7 @@ def replace_batch_norm(model):
        if isinstance(module, nn.BatchNorm2d):
            new_module = TestDetrFrozenBatchNorm2d(module.num_features)

-            if not module.weight.device == torch.device("meta"):
+            if module.weight.device != torch.device("meta"):
                new_module.weight.data.copy_(module.weight)
                new_module.bias.data.copy_(module.bias)
                new_module.running_mean.data.copy_(module.running_mean)
--- a/examples/pytorch/README.md
+++ b/examples/pytorch/README.md
@ -90,7 +90,7 @@ To specify a given repository name, use the `--hub_model_id` argument. You will

 A few notes on this integration:

- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `huggingface-cli login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument.
+- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `hf auth login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument.
 - the `output_dir` you pick will either need to be a new folder or a local clone of the distant repository you are using.

 ## Distributed training and mixed precision
--- a/examples/pytorch/audio-classification/README.md
+++ b/examples/pytorch/audio-classification/README.md
@ -115,10 +115,10 @@ On 4 V100 GPUs (16GB), this script should run in ~1 hour and yield accuracy of *
 $ apt install git-lfs
 ```

-2. Log in with your HuggingFace account credentials using `huggingface-cli`
+2. Log in with your HuggingFace account credentials using `hf`

 ```bash
-$ huggingface-cli login
+$ hf auth login
 # ...follow the prompts
 ```

--- a/examples/pytorch/audio-classification/run_audio_classification.py
+++ b/examples/pytorch/audio-classification/run_audio_classification.py
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
 logger = logging.getLogger(__name__)

 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")

@ -167,7 +167,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
--- a/examples/pytorch/contrastive-image-text/run_clip.py
+++ b/examples/pytorch/contrastive-image-text/run_clip.py
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
 logger = logging.getLogger(__name__)

 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

@ -100,7 +100,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
--- a/examples/pytorch/image-classification/README.md
+++ b/examples/pytorch/image-classification/README.md
@ -129,7 +129,7 @@ dataset = load_dataset("imagefolder", data_files={"train": ["path/to/file1", "pa
 Next, push it to the hub!

 ```python
-# assuming you have ran the huggingface-cli login command in a terminal
+# assuming you have ran the hf auth login command in a terminal
 dataset.push_to_hub("name_of_your_dataset")

 # if you want to push to a private repo, simply pass private=True:
@ -152,10 +152,10 @@ $ git config --global user.email "you@example.com"
 $ git config --global user.name "Your Name"
 ```

-2. Log in with your HuggingFace account credentials using `huggingface-cli`:
+2. Log in with your HuggingFace account credentials using `hf`:

 ```bash
-$ huggingface-cli login
+$ hf auth login
 # ...follow the prompts
 ```

--- a/examples/pytorch/image-classification/run_image_classification.py
+++ b/examples/pytorch/image-classification/run_image_classification.py
@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
 logger = logging.getLogger(__name__)

 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

@ -168,7 +168,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -288,7 +288,7 @@ def main():
        return {"pixel_values": pixel_values, "labels": labels}

    # If we don't have a validation split, split off a percentage of train as validation.
-    data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split
+    data_args.train_val_split = None if "validation" in dataset else data_args.train_val_split
    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
        split = dataset["train"].train_test_split(data_args.train_val_split)
        dataset["train"] = split["train"]
--- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py
+++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version


 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 logger = get_logger(__name__)

@ -324,7 +324,7 @@ def main():
        )

    # If we don't have a validation split, split off a percentage of train as validation.
-    args.train_val_split = None if "validation" in dataset.keys() else args.train_val_split
+    args.train_val_split = None if "validation" in dataset else args.train_val_split
    if isinstance(args.train_val_split, float) and args.train_val_split > 0.0:
        split = dataset["train"].train_test_split(args.train_val_split)
        dataset["train"] = split["train"]
--- a/examples/pytorch/image-pretraining/README.md
+++ b/examples/pytorch/image-pretraining/README.md
@ -239,10 +239,10 @@ $ git config --global user.email "you@example.com"
 $ git config --global user.name "Your Name"
 ```

-2. Log in with your HuggingFace account credentials using `huggingface-cli`
+2. Log in with your HuggingFace account credentials using `hf`

 ```bash
-$ huggingface-cli login
+$ hf auth login
 # ...follow the prompts
 ```

--- a/examples/pytorch/image-pretraining/run_mae.py
+++ b/examples/pytorch/image-pretraining/run_mae.py
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
 logger = logging.getLogger(__name__)

 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.54.0.dev0")
+check_min_version("4.55.0.dev0")

 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

@ -156,7 +156,7 @@ class ModelArguments:
        metadata={
            "help": (
                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+                "generated when running `hf auth login` (stored in `~/.huggingface`)."
            )
        },
    )
@ -247,7 +247,7 @@ def main():
    )

    # If we don't have a validation split, split off a percentage of train as validation.
-    data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split
+    data_args.train_val_split = None if "validation" in ds else data_args.train_val_split
    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
        split = ds["train"].train_test_split(data_args.train_val_split)
        ds["train"] = split["train"]
--- a/Show More
+++ b/Show More