mirror of
				https://github.com/huggingface/transformers.git
				synced 2025-11-01 01:24:35 +08:00 
			
		
		
		
	Compare commits
	
		
			7 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| ce334e82c9 | |||
| 02aba5d63d | |||
| 9c4b1c0a65 | |||
| 4cbb53cd31 | |||
| 8dfe70e808 | |||
| 6c087019d3 | |||
| bacc7db5ac | 
							
								
								
									
										2
									
								
								.github/workflows/benchmark.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/benchmark.yml
									
									
									
									
										vendored
									
									
								
							| @ -28,7 +28,7 @@ jobs: | ||||
|       (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )|| | ||||
|       (github.event_name == 'push' && github.ref == 'refs/heads/main') | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       image: huggingface/transformers-pytorch-gpu | ||||
|       options: --gpus all --privileged --ipc host | ||||
|     steps: | ||||
|       - name: Get repo | ||||
|  | ||||
| @ -9,7 +9,7 @@ jobs: | ||||
|     uses: ./.github/workflows/benchmark_v2.yml | ||||
|     with: | ||||
|       runner: aws-g5-4xlarge-cache-use1-public-80 | ||||
|       container_image: huggingface/transformers-all-latest-gpu | ||||
|       container_image: huggingface/transformers-pytorch-gpu | ||||
|       container_options: --gpus all --privileged --ipc host --shm-size "16gb" | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       run_id: ${{ github.run_id }} | ||||
|  | ||||
							
								
								
									
										182
									
								
								.github/workflows/build-docker-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										182
									
								
								.github/workflows/build-docker-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -45,52 +45,26 @@ jobs: | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }} | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} | ||||
|           title: 🤗 Results of the transformers-all-latest-gpu docker build | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   flash-attn-ci-image: | ||||
|     name: "PyTorch with Flash Attn [dev]" | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
|       # Push CI images still need to be re-built daily | ||||
|       - | ||||
|         name: Set up Docker Buildx | ||||
|         uses: docker/setup-buildx-action@v3 | ||||
|       - | ||||
|         name: Check out code | ||||
|         uses: actions/checkout@v4 | ||||
|       - | ||||
|         name: Login to DockerHub | ||||
|         uses: docker/login-action@v3 | ||||
|         with: | ||||
|           username: ${{ secrets.DOCKERHUB_USERNAME }} | ||||
|           password: ${{ secrets.DOCKERHUB_PASSWORD }} | ||||
|       - | ||||
|         name: Build and push | ||||
|         name: Build and push (for Push CI) in a daily basis | ||||
|         # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. | ||||
|         # The later case is useful for manual image building for debugging purpose. Use another tag in this case! | ||||
|         if: inputs.image_postfix != '-push-ci' | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: ./docker/transformers-all-latest-gpu | ||||
|           build-args: | | ||||
|             REF=main | ||||
|             PYTORCH=2.8.0 | ||||
|             TORCHCODEC=0.7.0 | ||||
|             FLASH_ATTN=yes | ||||
|           push: true | ||||
|           tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}:flash-attn | ||||
|           tags: huggingface/transformers-all-latest-gpu-push-ci | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} | ||||
|           title: 🤗 Results of the transformers-all-latest-gpu docker build | ||||
|           title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
| @ -130,8 +104,51 @@ jobs: | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) | ||||
|   latest-torch-deepspeed-docker-for-push-ci-daily-build: | ||||
|     name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
|       - | ||||
|         name: Set up Docker Buildx | ||||
|         uses: docker/setup-buildx-action@v3 | ||||
|       - | ||||
|         name: Check out code | ||||
|         uses: actions/checkout@v4 | ||||
|       - | ||||
|         name: Login to DockerHub | ||||
|         uses: docker/login-action@v3 | ||||
|         with: | ||||
|           username: ${{ secrets.DOCKERHUB_USERNAME }} | ||||
|           password: ${{ secrets.DOCKERHUB_PASSWORD }} | ||||
|       # Push CI images still need to be re-built daily | ||||
|       - | ||||
|         name: Build and push (for Push CI) in a daily basis | ||||
|         # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. | ||||
|         # The later case is useful for manual image building for debugging purpose. Use another tag in this case! | ||||
|         if: inputs.image_postfix != '-push-ci' | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: ./docker/transformers-pytorch-deepspeed-latest-gpu | ||||
|           build-args: | | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} | ||||
|           title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   doc-builder: | ||||
|     name: "Doc builder" | ||||
|     # Push CI doesn't need this image | ||||
|     if: inputs.image_postfix != '-push-ci' | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
| @ -164,6 +181,44 @@ jobs: | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   latest-pytorch: | ||||
|     name: "Latest PyTorch [dev]" | ||||
|     # Push CI doesn't need this image | ||||
|     if: inputs.image_postfix != '-push-ci' | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
|       - | ||||
|         name: Set up Docker Buildx | ||||
|         uses: docker/setup-buildx-action@v3 | ||||
|       - | ||||
|         name: Check out code | ||||
|         uses: actions/checkout@v4 | ||||
|       - | ||||
|         name: Login to DockerHub | ||||
|         uses: docker/login-action@v3 | ||||
|         with: | ||||
|           username: ${{ secrets.DOCKERHUB_USERNAME }} | ||||
|           password: ${{ secrets.DOCKERHUB_PASSWORD }} | ||||
|       - | ||||
|         name: Build and push | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: ./docker/transformers-pytorch-gpu | ||||
|           build-args: | | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-gpu | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} | ||||
|           title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   latest-pytorch-amd: | ||||
|     name: "Latest PyTorch (AMD) [dev]" | ||||
|     runs-on: | ||||
| @ -190,47 +245,29 @@ jobs: | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }} | ||||
|       # Push CI images still need to be re-built daily | ||||
|       - | ||||
|         name: Build and push (for Push CI) in a daily basis | ||||
|         # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. | ||||
|         # The later case is useful for manual image building for debugging purpose. Use another tag in this case! | ||||
|         if: inputs.image_postfix != '-push-ci' | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: ./docker/transformers-pytorch-amd-gpu | ||||
|           build-args: | | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-amd-gpu-push-ci | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} | ||||
|           title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu build | ||||
|           title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build | ||||
|           status: ${{ job.status }} | ||||
|           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|  | ||||
|   cache-latest-pytorch-amd: | ||||
|     name: "Cache Latest Pytorch (AMD) Image" | ||||
|     needs: latest-pytorch-amd | ||||
|     runs-on: | ||||
|       group: amd-mi325-1gpu | ||||
|     steps: | ||||
|       - | ||||
|         name: Login to DockerHub | ||||
|         uses: docker/login-action@v3 | ||||
|         with: | ||||
|           username: ${{ secrets.DOCKERHUB_USERNAME }} | ||||
|           password: ${{ secrets.DOCKERHUB_PASSWORD }} | ||||
|          | ||||
|       -  | ||||
|         name: Pull and save docker image to cache | ||||
|         run: | | ||||
|           image="huggingface/transformers-pytorch-amd-gpu" | ||||
|           final_path="/mnt/image-cache/transformers-pytorch-amd-gpu.tar" | ||||
|           tmp_path="${final_path}.tmp" | ||||
|  | ||||
|           echo "Pulling image: ${image}" | ||||
|           docker pull "${image}" | ||||
|  | ||||
|           echo "Saving to temp file: ${tmp_path}" | ||||
|           docker save "${image}" -o "${tmp_path}" | ||||
|  | ||||
|           echo "Moving to final path: ${final_path}" | ||||
|           mv -f "${tmp_path}" "${final_path}" | ||||
|  | ||||
|           echo "Cache populated successfully at ${final_path}" | ||||
|  | ||||
|   latest-pytorch-deepspeed-amd: | ||||
|     name: "PyTorch + DeepSpeed (AMD) [dev]" | ||||
|     runs-on: | ||||
| @ -257,6 +294,19 @@ jobs: | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }} | ||||
|       # Push CI images still need to be re-built daily | ||||
|       - | ||||
|         name: Build and push (for Push CI) in a daily basis | ||||
|         # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. | ||||
|         # The later case is useful for manual image building for debugging purpose. Use another tag in this case! | ||||
|         if: inputs.image_postfix != '-push-ci' | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: ./docker/transformers-pytorch-deepspeed-amd-gpu | ||||
|           build-args: | | ||||
|             REF=main | ||||
|           push: true | ||||
|           tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
| @ -269,6 +319,8 @@ jobs: | ||||
|  | ||||
|   latest-quantization-torch-docker: | ||||
|     name: "Latest Pytorch + Quantization [dev]" | ||||
|      # Push CI doesn't need this image | ||||
|     if: inputs.image_postfix != '-push-ci' | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
|  | ||||
							
								
								
									
										5
									
								
								.github/workflows/model_jobs.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.github/workflows/model_jobs.yml
									
									
									
									
										vendored
									
									
								
							| @ -28,9 +28,6 @@ on: | ||||
|       report_repo_id: | ||||
|         required: false | ||||
|         type: string | ||||
|       pytest_marker: | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
| @ -140,7 +137,7 @@ jobs: | ||||
|       - name: Run all tests on GPU | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v -m '${{ inputs.pytest_marker }}' --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt | ||||
|           script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt | ||||
|           ls -la | ||||
|           # Extract the exit code from the output file | ||||
|           EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2) | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/push-important-models.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/push-important-models.yml
									
									
									
									
										vendored
									
									
								
							| @ -149,7 +149,7 @@ jobs: | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#transformers-ci-push" | ||||
|       docker: huggingface/transformers-all-latest-gpu:flash-attn | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       ci_event: push | ||||
|       report_repo_id: hf-internal-testing/transformers_ci_push | ||||
|       commit_sha: ${{ github.sha }} | ||||
|  | ||||
							
								
								
									
										25
									
								
								.github/workflows/self-push-amd-mi210-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								.github/workflows/self-push-amd-mi210-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| name: Self-hosted runner (AMD mi210 CI caller) | ||||
|  | ||||
| on: | ||||
|   #workflow_run: | ||||
|   #  workflows: ["Self-hosted runner (push-caller)"] | ||||
|   #  branches: ["main"] | ||||
|   #  types: [completed] | ||||
|   push: | ||||
|     branches: | ||||
|       - run_amd_push_ci_caller* | ||||
|     paths: | ||||
|       - "src/**" | ||||
|       - "tests/**" | ||||
|       - ".github/**" | ||||
|       - "templates/**" | ||||
|       - "utils/**" | ||||
|  | ||||
| jobs: | ||||
|   run_amd_ci: | ||||
|     name: AMD mi210 | ||||
|     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) | ||||
|     uses: ./.github/workflows/self-push-amd.yml | ||||
|     with: | ||||
|       gpu_flavor: mi210 | ||||
|     secrets: inherit | ||||
							
								
								
									
										25
									
								
								.github/workflows/self-push-amd-mi250-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								.github/workflows/self-push-amd-mi250-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| name: Self-hosted runner (AMD mi250 CI caller) | ||||
|  | ||||
| on: | ||||
|   #workflow_run: | ||||
|   #  workflows: ["Self-hosted runner (push-caller)"] | ||||
|   #  branches: ["main"] | ||||
|   #  types: [completed] | ||||
|   push: | ||||
|     branches: | ||||
|       - run_amd_push_ci_caller* | ||||
|     paths: | ||||
|       - "src/**" | ||||
|       - "tests/**" | ||||
|       - ".github/**" | ||||
|       - "templates/**" | ||||
|       - "utils/**" | ||||
|  | ||||
| jobs: | ||||
|   run_amd_ci: | ||||
|     name: AMD mi250 | ||||
|     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) | ||||
|     uses: ./.github/workflows/self-push-amd.yml | ||||
|     with: | ||||
|       gpu_flavor: mi250 | ||||
|     secrets: inherit | ||||
							
								
								
									
										334
									
								
								.github/workflows/self-push-amd.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										334
									
								
								.github/workflows/self-push-amd.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,334 @@ | ||||
| name: Self-hosted runner AMD GPU (push) | ||||
|  | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       gpu_flavor: | ||||
|         required: true | ||||
|         type: string | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   OMP_NUM_THREADS: 8 | ||||
|   MKL_NUM_THREADS: 8 | ||||
|   PYTEST_TIMEOUT: 60 | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|  | ||||
| jobs: | ||||
|   check_runner_status: | ||||
|     name: Check Runner Status | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Checkout transformers | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           fetch-depth: 2 | ||||
|  | ||||
|       - name: Check Runner Status | ||||
|         run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | ||||
|  | ||||
|   check_runners: | ||||
|     name: Check Runners | ||||
|     needs: check_runner_status | ||||
|     strategy: | ||||
|       matrix: | ||||
|         machine_type: [single-gpu, multi-gpu] | ||||
|     runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now | ||||
|       options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     steps: | ||||
|       - name: ROCM-SMI | ||||
|         run: | | ||||
|           rocm-smi | ||||
|       - name: ROCM-INFO | ||||
|         run: | | ||||
|           rocminfo  | grep "Agent" -A 14 | ||||
|       - name: Show ROCR environment | ||||
|         run: | | ||||
|           echo "ROCR: $ROCR_VISIBLE_DEVICES" | ||||
|  | ||||
|   setup_gpu: | ||||
|     name: Setup | ||||
|     needs: check_runners | ||||
|     strategy: | ||||
|       matrix: | ||||
|         machine_type: [single-gpu, multi-gpu] | ||||
|     runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now | ||||
|       options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       matrix: ${{ steps.set-matrix.outputs.matrix }} | ||||
|       test_map: ${{ steps.set-matrix.outputs.test_map }} | ||||
|     env: | ||||
|       # `CI_BRANCH_PUSH`: The branch name from the push event | ||||
|       # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event | ||||
|       # `CI_SHA_PUSH`: The commit SHA from the push event | ||||
|       # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) | ||||
|         # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Cleanup | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           rm -rf tests/__pycache__ | ||||
|           rm -rf tests/models/__pycache__ | ||||
|           rm -rf reports | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Fetch the tests to run | ||||
|         working-directory: /transformers | ||||
|         # TODO: add `git-python` in the docker images | ||||
|         run: | | ||||
|           pip install --upgrade git-python | ||||
|           python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt | ||||
|  | ||||
|       - name: Report fetched tests | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: test_fetched | ||||
|           path: /transformers/test_preparation.txt | ||||
|  | ||||
|       - id: set-matrix | ||||
|         name: Organize tests into models | ||||
|         working-directory: /transformers | ||||
|         # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc. | ||||
|         # The `test_map` is used to get the actual identified test files under each key. | ||||
|         # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail) | ||||
|         run: | | ||||
|           if [ -f test_map.json ]; then | ||||
|               keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') | ||||
|               test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') | ||||
|           else | ||||
|               keys=$(python3 -c 'keys = ["dummy"]; print(keys)') | ||||
|               test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') | ||||
|           fi | ||||
|           echo $keys | ||||
|           echo $test_map | ||||
|           echo "matrix=$keys" >> $GITHUB_OUTPUT | ||||
|           echo "test_map=$test_map" >> $GITHUB_OUTPUT | ||||
|  | ||||
|   run_models_gpu: | ||||
|     name: Model tests | ||||
|     needs: setup_gpu | ||||
|     # `dummy` means there is no test to run | ||||
|     if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} | ||||
|         machine_type: [single-gpu, multi-gpu] | ||||
|     runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now | ||||
|       options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
|         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | ||||
|  | ||||
|       - name: Echo folder ${{ matrix.folders }} | ||||
|         shell: bash | ||||
|         # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | ||||
|         # set the artifact folder names (because the character `/` is not allowed). | ||||
|         run: | | ||||
|           echo "${{ matrix.folders }}" | ||||
|           echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" | ||||
|           matrix_folders=${{ matrix.folders }} | ||||
|           matrix_folders=${matrix_folders/'models/'/'models_'} | ||||
|           echo "$matrix_folders" | ||||
|           echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: ROCM-SMI | ||||
|         run: | | ||||
|           rocm-smi | ||||
|       - name: ROCM-INFO | ||||
|         run: | | ||||
|           rocminfo  | grep "Agent" -A 14 | ||||
|       - name: Show ROCR environment | ||||
|         run: | | ||||
|           echo "ROCR: $ROCR_VISIBLE_DEVICES" | ||||
|  | ||||
|       - name: Environment | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 utils/print_env.py | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run all non-slow selected tests on GPU | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports | ||||
|           path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports | ||||
|  | ||||
|   send_results: | ||||
|     name: Send results to webhook | ||||
|     runs-on: ubuntu-22.04 | ||||
|     if: always() | ||||
|     needs: [ | ||||
|         check_runner_status, | ||||
|         check_runners, | ||||
|         setup_gpu, | ||||
|         run_models_gpu, | ||||
| #        run_tests_torch_cuda_extensions_single_gpu, | ||||
| #        run_tests_torch_cuda_extensions_multi_gpu | ||||
|     ] | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       - name: Preliminary job status | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           echo "Runner availability: ${{ needs.check_runner_status.result }}" | ||||
|           echo "Setup status: ${{ needs.setup_gpu.result }}" | ||||
|           echo "Runner status: ${{ needs.check_runners.result }}" | ||||
|  | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - uses: actions/checkout@v4 | ||||
|         # To avoid failure when multiple commits are merged into `main` in a short period of time. | ||||
|         # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ... | ||||
|         # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit) | ||||
|         with: | ||||
|           fetch-depth: 20 | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - uses: actions/download-artifact@v4 | ||||
|       - name: Send message to Slack | ||||
|         env: | ||||
|           CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} | ||||
|           CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} | ||||
|           CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} | ||||
|           CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} | ||||
|           CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} | ||||
|           CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} | ||||
|           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | ||||
|           CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} | ||||
|           CI_TITLE_PUSH: ${{ github.event.head_commit.message }} | ||||
|           CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} | ||||
|           CI_SHA: ${{ env.CI_SHA }} | ||||
|           RUNNER_STATUS: ${{ needs.check_runner_status.result }} | ||||
|           RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} | ||||
|           SETUP_STATUS: ${{ needs.setup_gpu.result }} | ||||
|  | ||||
|         # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change | ||||
|         # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. | ||||
|         run: | | ||||
|           pip install huggingface_hub | ||||
|           pip install slack_sdk | ||||
|           pip show slack_sdk | ||||
|           python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" | ||||
							
								
								
									
										54
									
								
								.github/workflows/self-push-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								.github/workflows/self-push-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,54 @@ | ||||
| # Used to trigger self-push CI | ||||
| name: Self-hosted runner (push-caller) | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     paths: | ||||
|       - "src/**" | ||||
|       - "tests/**" | ||||
|       - ".github/**" | ||||
|       - "templates/**" | ||||
|       - "utils/**" | ||||
|  | ||||
| jobs: | ||||
|   check-for-setup: | ||||
|       runs-on: ubuntu-22.04 | ||||
|       name: Check if setup was changed | ||||
|       outputs: | ||||
|         changed: ${{ steps.was_changed.outputs.changed }} | ||||
|       steps: | ||||
|         - uses: actions/checkout@v4 | ||||
|           with:  | ||||
|             fetch-depth: "2" | ||||
|          | ||||
|         - name: Get changed files | ||||
|           id: changed-files | ||||
|           uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c | ||||
|          | ||||
|         - name: Was setup changed  | ||||
|           id: was_changed | ||||
|           run: | | ||||
|             for file in ${{ steps.changed-files.outputs.all_changed_files }}; do | ||||
|               if [ `basename "${file}"` = "setup.py" ]; then | ||||
|                 echo "changed=1" >> $GITHUB_OUTPUT | ||||
|               fi | ||||
|             done | ||||
|  | ||||
|   build-docker-containers: | ||||
|     needs: check-for-setup | ||||
|     if: (github.event_name == 'push') && (needs.check-for-setup.outputs.changed == '1') | ||||
|     uses: ./.github/workflows/build-docker-images.yml | ||||
|     with: | ||||
|       image_postfix: "-push-ci" | ||||
|     secrets: inherit | ||||
|  | ||||
|   run_push_ci: | ||||
|     name: Trigger Push CI | ||||
|     runs-on: ubuntu-22.04 | ||||
|     if: ${{ always() }} | ||||
|     needs: build-docker-containers | ||||
|     steps: | ||||
|       - name: Trigger push CI via workflow_run | ||||
|         run: echo "Trigger push CI via workflow_run" | ||||
							
								
								
									
										652
									
								
								.github/workflows/self-push.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										652
									
								
								.github/workflows/self-push.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,652 @@ | ||||
| name: Self-hosted runner (push) | ||||
|  | ||||
| on: | ||||
|   workflow_run: | ||||
|     workflows: ["Self-hosted runner (push-caller)"] | ||||
|     branches: ["main"] | ||||
|     types: [completed] | ||||
|   push: | ||||
|     branches: | ||||
|       - ci_* | ||||
|       - ci-* | ||||
|     paths: | ||||
|       - "src/**" | ||||
|       - "tests/**" | ||||
|       - ".github/**" | ||||
|       - "templates/**" | ||||
|       - "utils/**" | ||||
|   repository_dispatch: | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   OMP_NUM_THREADS: 8 | ||||
|   MKL_NUM_THREADS: 8 | ||||
|   PYTEST_TIMEOUT: 60 | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|  | ||||
| jobs: | ||||
|   setup: | ||||
|     name: Setup | ||||
|     strategy: | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       matrix: ${{ steps.set-matrix.outputs.matrix }} | ||||
|       test_map: ${{ steps.set-matrix.outputs.test_map }} | ||||
|     env: | ||||
|       # `CI_BRANCH_PUSH`: The branch name from the push event | ||||
|       # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event | ||||
|       # `CI_SHA_PUSH`: The commit SHA from the push event | ||||
|       # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) | ||||
|         # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Cleanup | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           rm -rf tests/__pycache__ | ||||
|           rm -rf tests/models/__pycache__ | ||||
|           rm -rf reports | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Fetch the tests to run | ||||
|         working-directory: /transformers | ||||
|         # TODO: add `git-python` in the docker images | ||||
|         run: | | ||||
|           pip install --upgrade git-python | ||||
|           python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt | ||||
|  | ||||
|       - name: Report fetched tests | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: test_fetched | ||||
|           path: /transformers/test_preparation.txt | ||||
|  | ||||
|       - id: set-matrix | ||||
|         name: Organize tests into models | ||||
|         working-directory: /transformers | ||||
|         # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc. | ||||
|         # The `test_map` is used to get the actual identified test files under each key. | ||||
|         # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail) | ||||
|         run: | | ||||
|           if [ -f test_map.json ]; then | ||||
|               keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') | ||||
|               test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') | ||||
|           else | ||||
|               keys=$(python3 -c 'keys = ["dummy"]; print(keys)') | ||||
|               test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') | ||||
|           fi | ||||
|           echo $keys | ||||
|           echo $test_map | ||||
|           echo "matrix=$keys" >> $GITHUB_OUTPUT | ||||
|           echo "test_map=$test_map" >> $GITHUB_OUTPUT | ||||
|  | ||||
|   run_tests_single_gpu: | ||||
|     name: Model tests | ||||
|     needs: setup | ||||
|     # `dummy` means there is no test to run | ||||
|     if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup.outputs.matrix) }} | ||||
|         machine_type: [aws-g5-4xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Set `machine_type` for report and artifact names | ||||
|         working-directory: /transformers | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|           fi | ||||
|  | ||||
|           echo "$machine_type" | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
|         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | ||||
|  | ||||
|       - name: Echo folder ${{ matrix.folders }} | ||||
|         shell: bash | ||||
|         # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | ||||
|         # set the artifact folder names (because the character `/` is not allowed). | ||||
|         run: | | ||||
|           echo "${{ matrix.folders }}" | ||||
|           echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}" | ||||
|           matrix_folders=${{ matrix.folders }} | ||||
|           matrix_folders=${matrix_folders/'models/'/'models_'} | ||||
|           echo "$matrix_folders" | ||||
|           echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: NVIDIA-SMI | ||||
|         run: | | ||||
|           nvidia-smi | ||||
|  | ||||
|       - name: Environment | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 utils/print_env.py | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run all non-slow selected tests on GPU | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }} | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports | ||||
|           path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} | ||||
|  | ||||
|   run_tests_multi_gpu: | ||||
|     name: Model tests | ||||
|     needs: setup | ||||
|     # `dummy` means there is no test to run | ||||
|     if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup.outputs.matrix) }} | ||||
|         machine_type: [aws-g5-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Set `machine_type` for report and artifact names | ||||
|         working-directory: /transformers | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|           fi | ||||
|  | ||||
|           echo "$machine_type" | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
|         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | ||||
|  | ||||
|       - name: Echo folder ${{ matrix.folders }} | ||||
|         shell: bash | ||||
|         # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | ||||
|         # set the artifact folder names (because the character `/` is not allowed). | ||||
|         run: | | ||||
|           echo "${{ matrix.folders }}" | ||||
|           echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}" | ||||
|           matrix_folders=${{ matrix.folders }} | ||||
|           matrix_folders=${matrix_folders/'models/'/'models_'} | ||||
|           echo "$matrix_folders" | ||||
|           echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: NVIDIA-SMI | ||||
|         run: | | ||||
|           nvidia-smi | ||||
|  | ||||
|       - name: Environment | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 utils/print_env.py | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run all non-slow selected tests on GPU | ||||
|         env: | ||||
|           MKL_SERVICE_FORCE_INTEL: 1 | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }} | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports | ||||
|           path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} | ||||
|  | ||||
|   run_tests_torch_cuda_extensions_single_gpu: | ||||
|     name: Torch CUDA extension tests | ||||
|     needs: setup | ||||
|     if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended') | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Set `machine_type` for report and artifact names | ||||
|         working-directory: /workspace/transformers | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|           fi | ||||
|  | ||||
|           echo "$machine_type" | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /workspace/transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /workspace/transformers | ||||
|         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | ||||
|  | ||||
|       - name: Remove cached torch extensions | ||||
|         run: rm -rf /github/home/.cache/torch_extensions/ | ||||
|  | ||||
|       # To avoid unknown test failures | ||||
|       - name: Pre build DeepSpeed *again* | ||||
|         working-directory: /workspace | ||||
|         run: | | ||||
|           python3 -m pip uninstall -y deepspeed | ||||
|           DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check | ||||
|  | ||||
|       - name: NVIDIA-SMI | ||||
|         run: | | ||||
|           nvidia-smi | ||||
|  | ||||
|       - name: Environment | ||||
|         working-directory: /workspace/transformers | ||||
|         run: | | ||||
|           python utils/print_env.py | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /workspace/transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run all non-slow selected tests on GPU | ||||
|         working-directory: /workspace/transformers | ||||
|         # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. | ||||
|         run: | | ||||
|           python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|           path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|  | ||||
|   run_tests_torch_cuda_extensions_multi_gpu: | ||||
|     name: Torch CUDA extension tests | ||||
|     needs: setup | ||||
|     if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended') | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - name: Set `machine_type` for report and artifact names | ||||
|         working-directory: /workspace/transformers | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|           fi | ||||
|  | ||||
|           echo "$machine_type" | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         working-directory: /workspace/transformers | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /workspace/transformers | ||||
|         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | ||||
|  | ||||
|       - name: Remove cached torch extensions | ||||
|         run: rm -rf /github/home/.cache/torch_extensions/ | ||||
|  | ||||
|       # To avoid unknown test failures | ||||
|       - name: Pre build DeepSpeed *again* | ||||
|         working-directory: /workspace | ||||
|         run: | | ||||
|           python3 -m pip uninstall -y deepspeed | ||||
|           DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check | ||||
|  | ||||
|       - name: NVIDIA-SMI | ||||
|         run: | | ||||
|           nvidia-smi | ||||
|  | ||||
|       - name: Environment | ||||
|         working-directory: /workspace/transformers | ||||
|         run: | | ||||
|           python utils/print_env.py | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         working-directory: /workspace/transformers | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run all non-slow selected tests on GPU | ||||
|         working-directory: /workspace/transformers | ||||
|         # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. | ||||
|         run: | | ||||
|           python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|           path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|  | ||||
|   send_results: | ||||
|     name: Send results to webhook | ||||
|     runs-on: ubuntu-22.04 | ||||
|     if: always() | ||||
|     needs: [ | ||||
|         setup, | ||||
|         run_tests_single_gpu, | ||||
|         run_tests_multi_gpu, | ||||
|         run_tests_torch_cuda_extensions_single_gpu, | ||||
|         run_tests_torch_cuda_extensions_multi_gpu | ||||
|     ] | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
|       CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | ||||
|       CI_SHA_PUSH: ${{ github.event.head_commit.id }} | ||||
|       CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | ||||
|     steps: | ||||
|       - name: Preliminary job status | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           echo "Setup status: ${{ needs.setup.result }}" | ||||
|  | ||||
|       # Necessary to get the correct branch name and commit SHA for `workflow_run` event | ||||
|       # We also take into account the `push` event (we might want to test some changes in a branch) | ||||
|       - name: Prepare custom environment variables | ||||
|         shell: bash | ||||
|         # For the meaning of these environment variables, see the job `Setup` | ||||
|         run: | | ||||
|           CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | ||||
|           echo $CI_BRANCH_PUSH | ||||
|           echo $CI_BRANCH_WORKFLOW_RUN | ||||
|           echo $CI_SHA_PUSH | ||||
|           echo $CI_SHA_WORKFLOW_RUN | ||||
|           [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|           [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: print environment variables | ||||
|         run: | | ||||
|           echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | ||||
|           echo "env.CI_SHA = ${{ env.CI_SHA }}" | ||||
|  | ||||
|       - uses: actions/checkout@v4 | ||||
|         # To avoid failure when multiple commits are merged into `main` in a short period of time. | ||||
|         # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ... | ||||
|         # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit) | ||||
|         with: | ||||
|           fetch-depth: 20 | ||||
|  | ||||
|       - name: Update clone using environment variables | ||||
|         run: | | ||||
|           echo "original branch = $(git branch --show-current)" | ||||
|           git fetch && git checkout ${{ env.CI_BRANCH }} | ||||
|           echo "updated branch = $(git branch --show-current)" | ||||
|           git checkout ${{ env.CI_SHA }} | ||||
|           echo "log = $(git log -n 1)" | ||||
|  | ||||
|       - uses: actions/download-artifact@v4 | ||||
|       - name: Send message to Slack | ||||
|         env: | ||||
|           CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} | ||||
|           CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} | ||||
|           CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} | ||||
|           CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} | ||||
|           CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} | ||||
|           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | ||||
|           CI_EVENT: push | ||||
|           CI_TITLE_PUSH: ${{ github.event.head_commit.message }} | ||||
|           CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} | ||||
|           CI_SHA: ${{ env.CI_SHA }} | ||||
|           SETUP_STATUS: ${{ needs.setup.result }} | ||||
|  | ||||
|         # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change | ||||
|         # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. | ||||
|         run: | | ||||
|           pip install huggingface_hub | ||||
|           pip install slack_sdk | ||||
|           pip show slack_sdk | ||||
|           python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" | ||||
							
								
								
									
										2
									
								
								.github/workflows/self-scheduled-caller.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/self-scheduled-caller.yml
									
									
									
									
										vendored
									
									
								
							| @ -63,7 +63,7 @@ jobs: | ||||
|     with: | ||||
|       job: run_pipelines_torch_gpu | ||||
|       slack_report_channel: "#transformers-ci-daily-pipeline-torch" | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       docker: huggingface/transformers-pytorch-gpu | ||||
|       ci_event: Daily CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|  | ||||
| @ -1,60 +0,0 @@ | ||||
| name: Nvidia CI - Flash Attn | ||||
|  | ||||
| on: | ||||
|   repository_dispatch: | ||||
|   schedule: | ||||
|     - cron: "17 2 * * *" | ||||
|   push: | ||||
|     branches: | ||||
|       - run_nvidia_ci_flash_attn* | ||||
|   workflow_dispatch: | ||||
|     inputs: | ||||
|       prev_workflow_run_id: | ||||
|         description: 'previous workflow run id to compare' | ||||
|         type: string | ||||
|         required: false | ||||
|         default: "" | ||||
|       other_workflow_run_id: | ||||
|         description: 'other workflow run id to compare' | ||||
|         type: string | ||||
|         required: false | ||||
|         default: "" | ||||
|  | ||||
|  | ||||
| # Used for `push` to easily modify the target workflow runs to compare against | ||||
| env: | ||||
|     prev_workflow_run_id: "" | ||||
|     other_workflow_run_id: "" | ||||
|  | ||||
|  | ||||
| jobs: | ||||
|   setup: | ||||
|     name: Setup | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Setup | ||||
|         run: | | ||||
|           mkdir "setup_values" | ||||
|           echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt" | ||||
|           echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt" | ||||
|  | ||||
|       - name: Upload artifacts | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: setup_values | ||||
|           path: setup_values | ||||
|  | ||||
|  | ||||
|   model-ci: | ||||
|     name: Model CI | ||||
|     uses: ./.github/workflows/self-scheduled.yml | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#transformers-ci-flash-attn" | ||||
|       docker: huggingface/transformers-all-latest-gpu:flash-attn | ||||
|       ci_event: Daily CI | ||||
|       runner_type: "a10" | ||||
|       report_repo_id: hf-internal-testing/transformers_flash_attn_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       pytest_marker: "flash_attn_test or flash_attn_3_test" | ||||
|     secrets: inherit | ||||
							
								
								
									
										7
									
								
								.github/workflows/self-scheduled.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/workflows/self-scheduled.yml
									
									
									
									
										vendored
									
									
								
							| @ -38,10 +38,6 @@ on: | ||||
|         default: "" | ||||
|         required: false | ||||
|         type: string | ||||
|       pytest_marker: | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
| @ -131,7 +127,6 @@ jobs: | ||||
|       commit_sha: ${{ inputs.commit_sha || github.sha }} | ||||
|       runner_type: ${{ inputs.runner_type }} | ||||
|       report_repo_id: ${{ inputs.report_repo_id }} | ||||
|       pytest_marker: ${{ inputs.pytest_marker }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   run_trainer_and_fsdp_gpu: | ||||
| @ -165,7 +160,7 @@ jobs: | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       image: huggingface/transformers-pytorch-gpu | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|  | ||||
							
								
								
									
										13
									
								
								.github/workflows/ssh-runner.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								.github/workflows/ssh-runner.yml
									
									
									
									
										vendored
									
									
								
							| @ -41,9 +41,9 @@ jobs: | ||||
|           elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then | ||||
|             echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV | ||||
|           elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then | ||||
|             echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV | ||||
|             echo "RUNNER=aws-g5-4xlarge-cache-ssh-use2" >> $GITHUB_ENV | ||||
|           elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then | ||||
|             echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV | ||||
|             echo "RUNNER=aws-g5-12xlarge-cache-ssh" >> $GITHUB_ENV | ||||
|           else | ||||
|             echo "RUNNER=" >> $GITHUB_ENV | ||||
|           fi | ||||
| @ -106,7 +106,7 @@ jobs: | ||||
|           else | ||||
|             echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV | ||||
|           fi | ||||
|  | ||||
|          | ||||
|       - name: Tailscale # In order to be able to SSH when a test fails | ||||
|         uses: huggingface/tailscale-action@main | ||||
|         with: | ||||
| @ -115,3 +115,10 @@ jobs: | ||||
|           slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|           waitForSSH: true | ||||
|           sshTimeout: 15m | ||||
|           version: '1.90.3' | ||||
|           sha256sum: '96411140a11ccdfff6243b88e3f84692e1f176990050fb9f43a53970c0873f31' | ||||
|  | ||||
|       - name: wait2 | ||||
|         if: success() || failure() | ||||
|         shell: bash | ||||
|         run: sleep 15m | ||||
|  | ||||
| @ -14,7 +14,7 @@ This AGENTS.md file provides guidance for code agents working with this codebase | ||||
|  | ||||
| - PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff. | ||||
| - When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model. | ||||
| - Code style is enforced in the CI. You can install the style tools with `pip install -e ".[quality]"`. You can then run `make fixup` to apply style and consistency fixes to your code. | ||||
| - Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code. | ||||
|  | ||||
| ## Copying and inheritance | ||||
|  | ||||
| @ -36,4 +36,4 @@ After making changes, you should usually run `make fixup` to ensure any copies a | ||||
| the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py` | ||||
| If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`. | ||||
|  | ||||
| In order to run tests, you may need to install dependencies. You can do this with `pip install -e ".[testing]"`. You will probably also need to `pip install torch accelerate` if your environment does not already have them. | ||||
| In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them. | ||||
| @ -9,12 +9,6 @@ In this list, we showcase incredibly impactful and novel projects that have push | ||||
| adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR | ||||
| to add it. | ||||
|  | ||||
| ## [◉ Universal Intelligence](https://github.com/blueraai/universal-intelligence) | ||||
|  | ||||
| [Universal Intelligence](https://github.com/blueraai/universal-intelligence) aims to standardize models, tools, and agents —transforming them into simple, composable, portable, interoperable, framework-agnostic, hardware-agnostic interfaces (through auto-negotiation and resource sharing); for fast and accessible development of AI applications. | ||||
|  | ||||
| Keywords: Protocol, Open-source, LLMs, Large Language Models, Agents, Low-code | ||||
|  | ||||
| ## [gpt4all](https://github.com/nomic-ai/gpt4all) | ||||
|  | ||||
| [gpt4all](https://github.com/nomic-ai/gpt4all) is an ecosystem of open-source chatbots trained on massive collections of clean assistant data including code, stories and dialogue. It offers open-source, large language models such as LLaMA and GPT-J trained in an assistant-style. | ||||
|  | ||||
| @ -9,15 +9,10 @@ SHELL ["sh", "-lc"] | ||||
| # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant | ||||
| # to be used as arguments for docker build (so far). | ||||
|  | ||||
| ARG PYTORCH='2.9.0' | ||||
| ARG PYTORCH='2.8.0' | ||||
| # Example: `cu102`, `cu113`, etc. | ||||
| ARG CUDA='cu126' | ||||
|  | ||||
| # This needs to be compatible with the above `PYTORCH`. | ||||
| ARG TORCHCODEC='0.8.0' | ||||
|  | ||||
| ARG FLASH_ATTN='false' | ||||
|  | ||||
| RUN apt update | ||||
| RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs | ||||
| RUN git lfs install | ||||
| @ -26,44 +21,11 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip | ||||
| ARG REF=main | ||||
| RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev] | ||||
|  | ||||
| # 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future. | ||||
| # 2. For `torchcodec`, use `cpu` as we don't have `libnvcuvid.so` on the host runner. See https://github.com/meta-pytorch/torchcodec/issues/912 | ||||
| #    **Important**: We need to specify `torchcodec` version if the torch version is not the latest stable one. | ||||
| # 3. `set -e` means "exit immediately if any command fails". | ||||
| RUN set -e; \ | ||||
|     # Determine torch version | ||||
|     if [ ${#PYTORCH} -gt 0 ] && [ "$PYTORCH" != "pre" ]; then \ | ||||
|         VERSION="torch==${PYTORCH}.*"; \ | ||||
|         TORCHCODEC_VERSION="torchcodec==${TORCHCODEC}.*"; \ | ||||
|     else \ | ||||
|         VERSION="torch"; \ | ||||
|         TORCHCODEC_VERSION="torchcodec"; \ | ||||
|     fi; \ | ||||
|     \ | ||||
|     # Log the version being installed | ||||
|     echo "Installing torch version: $VERSION"; \ | ||||
|     \ | ||||
|     # Install PyTorch packages | ||||
|     if [ "$PYTORCH" != "pre" ]; then \ | ||||
|         python3 -m pip install --no-cache-dir -U \ | ||||
|             $VERSION \ | ||||
|             torchvision \ | ||||
|             torchaudio \ | ||||
|             --extra-index-url https://download.pytorch.org/whl/$CUDA; \ | ||||
|         # We need to specify the version if the torch version is not the latest stable one. | ||||
|         python3 -m pip install --no-cache-dir -U \ | ||||
|             $TORCHCODEC_VERSION --extra-index-url https://download.pytorch.org/whl/cpu; \ | ||||
|     else \ | ||||
|         python3 -m pip install --no-cache-dir -U --pre \ | ||||
|             torch \ | ||||
|             torchvision \ | ||||
|             torchaudio \ | ||||
|             --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA; \ | ||||
|         python3 -m pip install --no-cache-dir -U --pre \ | ||||
|             torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/cpu; \ | ||||
|     fi | ||||
| # 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`. | ||||
| #    Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions). | ||||
| # 3. For `torchcodec<0.8`: this is quickly added as torch 2.9.0 + torchcodec 0.8.0 fails on our CI env. Need to remove later once they work. | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio "torchcodec<0.8" --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir -U timm | ||||
|  | ||||
| @ -92,7 +54,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes | ||||
| RUN python3 -m pip install --no-cache-dir quanto | ||||
|  | ||||
| # After using A10 as CI runner, let's run FA2 tests | ||||
| RUN [ "$FLASH_ATTN" != "false" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch" | ||||
| RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch" | ||||
|  | ||||
| # TODO (ydshieh): check this again | ||||
| # `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests | ||||
|  | ||||
| @ -38,7 +38,7 @@ pip install transformers[dev] | ||||
| or for an editable install: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e .[dev] | ||||
| ``` | ||||
|  | ||||
| inside the Transformers repo. Since the number of optional dependencies of Transformers has grown a lot, it's possible you don't manage to get all of them. If the dev install fails, make sure to install PyTorch then do | ||||
| @ -50,7 +50,7 @@ pip install transformers[quality] | ||||
| or for an editable install: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[quality]" | ||||
| pip install -e .[quality] | ||||
| ``` | ||||
|  | ||||
| ## Tests | ||||
|  | ||||
| @ -37,7 +37,7 @@ pip install transformers[dev] | ||||
| o una instalación editable: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e .[dev] | ||||
| ``` | ||||
|  | ||||
| del repositorio de Transformers. | ||||
|  | ||||
| @ -37,7 +37,7 @@ pip install transformers[dev] | ||||
| o un'installazione modificabile: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e .[dev] | ||||
| ``` | ||||
|  | ||||
| all'interno del repo Transformers. | ||||
|  | ||||
| @ -40,7 +40,7 @@ pip install transformers[dev] | ||||
|  | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e .[dev] | ||||
| ``` | ||||
|  | ||||
| トランスフォーマーズのリポジトリ内で作業しています。トランスフォーマーズのオプションの依存関係の数が増えたため、すべてを取得できない可能性があります。開発用インストールが失敗した場合、作業しているディープラーニングフレームワーク(PyTorch、TensorFlow、および/またはFlax)をインストールし、次の手順を実行してください。 | ||||
| @ -53,7 +53,7 @@ pip install transformers[quality] | ||||
| または編集可能なインストールの場合: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[quality]" | ||||
| pip install -e .[quality] | ||||
| ``` | ||||
|  | ||||
| ## Tests | ||||
|  | ||||
| @ -37,7 +37,7 @@ pip install transformers[dev] | ||||
| 또는 Transformers 저장소 내에 편집 가능한 설치가 필요합니다: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e .[dev] | ||||
| ``` | ||||
|  | ||||
| Transformers의 선택적 종속성 수가 많이 늘어났기 때문에 개발 설치를 실패할 수도 있습니다. 개발 설치가 실패하는 경우, 작업 중인 Deep Learning 프레임워크 (PyTorch, TensorFlow 및/또는 Flax)를 설치하고 다음 명령을 실행하세요. | ||||
| @ -49,7 +49,7 @@ pip install transformers[quality] | ||||
| 편집 가능한 설치의 경우는 다음 명령을 실행하세요. | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[quality]" | ||||
| pip install -e .[quality] | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -308,19 +308,11 @@ def main(): | ||||
|             api = HfApi() | ||||
|             repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id | ||||
|  | ||||
|             os.makedirs(args.output_dir, exist_ok=True) | ||||
|             gitignore_path = os.path.join(args.output_dir, ".gitignore") | ||||
|             content = "" | ||||
|             if os.path.exists(gitignore_path): | ||||
|                 with open(gitignore_path, "r") as f: | ||||
|                     content = f.read() | ||||
|             with open(gitignore_path, "a") as f: | ||||
|                 if content and not content.endswith("\n"): | ||||
|                     f.write("\n") | ||||
|                 if "step_*" not in content: | ||||
|                     f.write("step_*\n") | ||||
|                 if "epoch_*" not in content: | ||||
|                     f.write("epoch_*\n") | ||||
|             with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: | ||||
|                 if "step_*" not in gitignore: | ||||
|                     gitignore.write("step_*\n") | ||||
|                 if "epoch_*" not in gitignore: | ||||
|                     gitignore.write("epoch_*\n") | ||||
|         elif args.output_dir is not None: | ||||
|             os.makedirs(args.output_dir, exist_ok=True) | ||||
|     accelerator.wait_for_everyone() | ||||
|  | ||||
| @ -33,9 +33,9 @@ You can open any page of the documentation as a notebook in Colab (there is a bu | ||||
| | [Quicktour of the library](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/quicktour.ipynb)  | A presentation of the various APIs in Transformers |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/quicktour.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/en/transformers_doc/quicktour.ipynb)| | | ||||
| | [Summary of the tasks](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb)  | How to run the models of the Transformers library task by task |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb)| | | ||||
| | [Preprocessing data](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb)  | How to use a tokenizer to preprocess your data |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb) | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb)|| | ||||
| | [Fine-tuning a pretrained model](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)  | How to use the Trainer to fine-tune a pretrained model |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| | ||||
| | [Summary of the tokenizers](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)  | The differences between the tokenizers algorithm |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb )| | ||||
| | [Multilingual models](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)  | How to use the multilingual models of the library |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| | ||||
| | [Fine-tuning a pretrained model](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)  | How to use the Trainer to fine-tune a pretrained model |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| | | ||||
| | [Summary of the tokenizers](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)  | The differences between the tokenizers algorithm |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)|[](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb )| | ||||
| | [Multilingual models](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)  | How to use the multilingual models of the library |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|[](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| | ||||
|  | ||||
| ### PyTorch Examples | ||||
|  | ||||
| @ -43,14 +43,14 @@ You can open any page of the documentation as a notebook in Colab (there is a bu | ||||
|  | ||||
| | Notebook     |      Description      |   |   |   | | ||||
| |:----------|:-------------|:-------------|:-------------|------:| | ||||
| | [Train your tokenizer](https://github.com/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)  | How to train and use your very own tokenizer  |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| | ||||
| | [Train your language model](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)   | How to easily start using transformers  |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| | ||||
| | [Train your tokenizer](https://github.com/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)  | How to train and use your very own tokenizer  |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|[](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| | ||||
| | [Train your language model](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)   | How to easily start using transformers  |[](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|[](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| | ||||
| | [How to fine-tune a model on text classification](https://github.com/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| | | ||||
| | [How to fine-tune a model on language modeling](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| | ||||
| | [How to fine-tune a model on token classification](https://github.com/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| | ||||
| | [How to fine-tune a model on question answering](https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| | ||||
| | [How to fine-tune a model on multiple choice](https://github.com/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| | ||||
| | [How to fine-tune a model on translation](https://github.com/huggingface/notebooks/blob/main/examples/translation.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on WMT. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/translation.ipynb)|[](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| | ||||
| | [How to fine-tune a model on language modeling](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| | ||||
| | [How to fine-tune a model on token classification](https://github.com/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| | | ||||
| | [How to fine-tune a model on question answering](https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| | | ||||
| | [How to fine-tune a model on multiple choice](https://github.com/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| | | ||||
| | [How to fine-tune a model on translation](https://github.com/huggingface/notebooks/blob/main/examples/translation.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on WMT. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| | | ||||
| | [How to fine-tune a model on summarization](https://github.com/huggingface/notebooks/blob/main/examples/summarization.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| | | ||||
| | [How to train a language model from scratch](https://github.com/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| Highlight all the steps to effectively train Transformer model on custom data | [](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| | | ||||
| | [How to generate text](https://github.com/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| How to use different decoding methods for language generation with transformers | [](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| | | ||||
| @ -58,16 +58,16 @@ You can open any page of the documentation as a notebook in Colab (there is a bu | ||||
|  | ||||
| #### Computer Vision[[pytorch-cv]] | ||||
|  | ||||
| | Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |   | | ||||
| |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------|------:| | ||||
| | [How to fine-tune a model on image classification (Torchvision)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification.ipynb)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)                 | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)| [](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)| | ||||
| | [How to fine-tune a model on image classification (Albumentations)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)  | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)| | | ||||
| | [How to fine-tune a model on image classification (Kornia)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)          | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)| | | ||||
| | [How to perform zero-shot object detection with OWL-ViT](https://github.com/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)          | Show how to perform zero-shot object detection on images with text queries                                             | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| | | ||||
| | [How to fine-tune an image captioning model](https://github.com/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)| | | ||||
| | [How to build an image similarity system with Transformers](https://github.com/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)                            | Show how to build an image similarity system                                                                           | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)                     | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)| | | ||||
| | [How to fine-tune a SegFormer model on semantic segmentation](https://github.com/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)| | | ||||
| | [How to fine-tune a VideoMAE model on video classification](https://github.com/huggingface/notebooks/blob/main/examples/video_classification.ipynb)          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)| | | ||||
| | Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   | | ||||
| |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:| | ||||
| | [How to fine-tune a model on image classification (Torchvision)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification.ipynb)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)                 | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)| | ||||
| | [How to fine-tune a model on image classification (Albumentations)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)  | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)| | ||||
| | [How to fine-tune a model on image classification (Kornia)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)          | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)| | ||||
| | [How to perform zero-shot object detection with OWL-ViT](https://github.com/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)          | Show how to perform zero-shot object detection on images with text queries                                             | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| | ||||
| | [How to fine-tune an image captioning model](https://github.com/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)| | ||||
| | [How to build an image similarity system with Transformers](https://github.com/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)                            | Show how to build an image similarity system                                                                           | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)                     | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)| | ||||
| | [How to fine-tune a SegFormer model on semantic segmentation](https://github.com/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)| | ||||
| | [How to fine-tune a VideoMAE model on video classification](https://github.com/huggingface/notebooks/blob/main/examples/video_classification.ipynb)          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)                | [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)| | ||||
|  | ||||
| #### Audio[[pytorch-audio]] | ||||
|  | ||||
|  | ||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @ -104,7 +104,7 @@ _deps = [ | ||||
|     "deepspeed>=0.9.3", | ||||
|     "diffusers", | ||||
|     "dill<0.3.5", | ||||
|     "evaluate>=0.4.6", | ||||
|     "evaluate>=0.2.0", | ||||
|     "faiss-cpu", | ||||
|     "fastapi", | ||||
|     "filelock", | ||||
|  | ||||
| @ -14,7 +14,7 @@ deps = { | ||||
|     "deepspeed": "deepspeed>=0.9.3", | ||||
|     "diffusers": "diffusers", | ||||
|     "dill": "dill<0.3.5", | ||||
|     "evaluate": "evaluate>=0.4.6", | ||||
|     "evaluate": "evaluate>=0.2.0", | ||||
|     "faiss-cpu": "faiss-cpu", | ||||
|     "fastapi": "fastapi", | ||||
|     "filelock": "filelock", | ||||
|  | ||||
| @ -314,14 +314,13 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict): | ||||
|         args = (state_dict, prefix, local_metadata, True, [], [], error_msgs) | ||||
|         # Parameters of module and children will start with prefix. We can exit early if there are none in this | ||||
|         # state_dict | ||||
|         if is_deepspeed_zero3_enabled(): | ||||
|         if is_deepspeed_zero3_enabled() and len([key for key in state_dict if key.startswith(prefix)]) > 0: | ||||
|             import deepspeed | ||||
|  | ||||
|             # In sharded models, each shard has only part of the full state_dict, so only gather | ||||
|             # parameters that are in the current state_dict. | ||||
|             named_parameters = dict(module.named_parameters(prefix=prefix[:-1], recurse=False)) | ||||
|             params_to_gather = [named_parameters[k] for k in named_parameters if k in state_dict] | ||||
|  | ||||
|             params_to_gather = [named_parameters[k] for k in state_dict if k in named_parameters] | ||||
|             if len(params_to_gather) > 0: | ||||
|                 # because zero3 puts placeholders in model params, this context | ||||
|                 # manager gathers (unpartitions) the params of the current layer, then loads from | ||||
|  | ||||
| @ -628,7 +628,7 @@ def maybe_load_adapters( | ||||
|     **adapter_kwargs, | ||||
| ): | ||||
|     if pretrained_model_name_or_path is None or not is_peft_available(): | ||||
|         return None, pretrained_model_name_or_path, adapter_kwargs | ||||
|         return None, pretrained_model_name_or_path | ||||
|  | ||||
|     token = download_kwargs.get("token") | ||||
|  | ||||
| @ -651,15 +651,13 @@ def maybe_load_adapters( | ||||
|  | ||||
|     _adapter_model_path = adapter_kwargs.pop("_adapter_model_path", None) | ||||
|  | ||||
|     token_from_adapter_kwargs = adapter_kwargs.pop("token", None) | ||||
|  | ||||
|     if _adapter_model_path is None: | ||||
|         _adapter_model_path = find_adapter_config_file( | ||||
|             pretrained_model_name_or_path, | ||||
|             cache_dir=download_kwargs.get("cache_dir"), | ||||
|             force_download=bool(download_kwargs.get("force_download", False)), | ||||
|             proxies=download_kwargs.get("proxies"), | ||||
|             token=token or token_from_adapter_kwargs, | ||||
|             token=token, | ||||
|             revision=download_kwargs.get("revision"), | ||||
|             local_files_only=bool(download_kwargs.get("local_files_only", False)), | ||||
|             subfolder=download_kwargs.get("subfolder", ""), | ||||
| @ -672,4 +670,4 @@ def maybe_load_adapters( | ||||
|             _adapter_model_path = pretrained_model_name_or_path | ||||
|             pretrained_model_name_or_path = json.load(f)["base_model_name_or_path"] | ||||
|  | ||||
|     return _adapter_model_path, pretrained_model_name_or_path, adapter_kwargs | ||||
|     return _adapter_model_path, pretrained_model_name_or_path | ||||
|  | ||||
| @ -4353,7 +4353,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH | ||||
|         if adapter_kwargs is None: | ||||
|             adapter_kwargs = {} | ||||
|  | ||||
|         _adapter_model_path, pretrained_model_name_or_path, adapter_kwargs = maybe_load_adapters( | ||||
|         _adapter_model_path, pretrained_model_name_or_path = maybe_load_adapters( | ||||
|             pretrained_model_name_or_path, | ||||
|             download_kwargs_with_commit, | ||||
|             **adapter_kwargs, | ||||
|  | ||||
| @ -538,12 +538,12 @@ class BartEncoder(BartPreTrainedModel): | ||||
|         self.max_source_positions = config.max_position_embeddings | ||||
|         embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0 | ||||
|  | ||||
|         self.embed_tokens = BartScaledWordEmbedding( | ||||
|             config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale | ||||
|         ) | ||||
|  | ||||
|         if embed_tokens is not None: | ||||
|             self.embed_tokens = embed_tokens | ||||
|         else: | ||||
|             self.embed_tokens = BartScaledWordEmbedding( | ||||
|                 config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale | ||||
|             ) | ||||
|             self.embed_tokens.weight = embed_tokens.weight | ||||
|  | ||||
|         self.embed_positions = BartLearnedPositionalEmbedding( | ||||
|             config.max_position_embeddings, | ||||
| @ -682,12 +682,12 @@ class BartDecoder(BartPreTrainedModel): | ||||
|         self.max_target_positions = config.max_position_embeddings | ||||
|         embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0 | ||||
|  | ||||
|         self.embed_tokens = BartScaledWordEmbedding( | ||||
|             config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale | ||||
|         ) | ||||
|  | ||||
|         if embed_tokens is not None: | ||||
|             self.embed_tokens = embed_tokens | ||||
|         else: | ||||
|             self.embed_tokens = BartScaledWordEmbedding( | ||||
|                 config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale | ||||
|             ) | ||||
|             self.embed_tokens.weight = embed_tokens.weight | ||||
|  | ||||
|         self.embed_positions = BartLearnedPositionalEmbedding( | ||||
|             config.max_position_embeddings, | ||||
|  | ||||
| @ -22,7 +22,7 @@ import torch | ||||
| from torch import nn | ||||
|  | ||||
| from ...activations import ACT2FN | ||||
| from ...masking_utils import create_causal_mask | ||||
| from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask | ||||
| from ...modeling_layers import GradientCheckpointingLayer | ||||
| from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput | ||||
| from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel | ||||
| @ -310,6 +310,7 @@ class CLIPAttention(nn.Module): | ||||
|         self, | ||||
|         hidden_states: torch.Tensor, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         causal_attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: | ||||
|         """Input shape: Batch x Time x Channel""" | ||||
| @ -323,6 +324,15 @@ class CLIPAttention(nn.Module): | ||||
|         queries = queries.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         keys = keys.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         values = values.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         # CLIP text model uses both `causal_attention_mask` and `attention_mask` | ||||
|         # in case FA2 kernel is called, `is_causal` should be inferred from `causal_attention_mask` | ||||
|         if self.config._attn_implementation == "flash_attention_2": | ||||
|             self.is_causal = causal_attention_mask is not None | ||||
|         else: | ||||
|             if attention_mask is not None and causal_attention_mask is not None: | ||||
|                 attention_mask = attention_mask + causal_attention_mask | ||||
|             elif causal_attention_mask is not None: | ||||
|                 attention_mask = causal_attention_mask | ||||
|  | ||||
|         attention_interface: Callable = eager_attention_forward | ||||
|         if self.config._attn_implementation != "eager": | ||||
| @ -334,12 +344,13 @@ class CLIPAttention(nn.Module): | ||||
|             keys, | ||||
|             values, | ||||
|             attention_mask, | ||||
|             is_causal=self.is_causal, | ||||
|             scaling=self.scale, | ||||
|             dropout=0.0 if not self.training else self.dropout, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
|         attn_output = attn_output.reshape(batch_size, seq_length, -1).contiguous() | ||||
|         attn_output = attn_output.reshape(batch_size, seq_length, embed_dim).contiguous() | ||||
|         attn_output = self.out_proj(attn_output) | ||||
|  | ||||
|         return attn_output, attn_weights | ||||
| @ -373,14 +384,16 @@ class CLIPEncoderLayer(GradientCheckpointingLayer): | ||||
|         self, | ||||
|         hidden_states: torch.Tensor, | ||||
|         attention_mask: torch.Tensor, | ||||
|         causal_attention_mask: torch.Tensor, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> torch.FloatTensor: | ||||
|         residual = hidden_states | ||||
|  | ||||
|         hidden_states = self.layer_norm1(hidden_states) | ||||
|         hidden_states, _ = self.self_attn( | ||||
|         hidden_states, attn_weights = self.self_attn( | ||||
|             hidden_states=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             causal_attention_mask=causal_attention_mask, | ||||
|             **kwargs, | ||||
|         ) | ||||
|         hidden_states = residual + hidden_states | ||||
| @ -484,6 +497,7 @@ class CLIPEncoder(nn.Module): | ||||
|         self, | ||||
|         inputs_embeds, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         causal_attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> BaseModelOutput: | ||||
|         r""" | ||||
| @ -498,6 +512,13 @@ class CLIPEncoder(nn.Module): | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|  | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|             causal_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): | ||||
|                 Causal mask for the text model. Mask values selected in `[0, 1]`: | ||||
|  | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|  | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|         """ | ||||
|         hidden_states = inputs_embeds | ||||
| @ -505,6 +526,7 @@ class CLIPEncoder(nn.Module): | ||||
|             hidden_states = encoder_layer( | ||||
|                 hidden_states, | ||||
|                 attention_mask, | ||||
|                 causal_attention_mask, | ||||
|                 **kwargs, | ||||
|             ) | ||||
|  | ||||
| @ -541,19 +563,17 @@ class CLIPTextTransformer(nn.Module): | ||||
|  | ||||
|         hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) | ||||
|  | ||||
|         attention_mask = create_causal_mask( | ||||
|             config=self.config, | ||||
|             input_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device), | ||||
|             past_key_values=None, | ||||
|         causal_attention_mask = _create_4d_causal_attention_mask( | ||||
|             input_shape, hidden_states.dtype, device=hidden_states.device | ||||
|         ) | ||||
|  | ||||
|         kwargs.pop("is_causal", None) | ||||
|         if attention_mask is not None and self.config._attn_implementation != "flash_attention_2": | ||||
|             attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype) | ||||
|  | ||||
|         encoder_outputs: BaseModelOutput = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             is_causal=True, | ||||
|             causal_attention_mask=causal_attention_mask, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
| @ -598,6 +618,7 @@ class CLIPTextModel(CLIPPreTrainedModel): | ||||
|     input_modalities = "text" | ||||
|  | ||||
|     _no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer"] | ||||
|     _supports_flash_attn = False  # mask creation only accounts for sdpa/eager | ||||
|  | ||||
|     def __init__(self, config: CLIPTextConfig): | ||||
|         super().__init__(config) | ||||
| @ -611,7 +632,8 @@ class CLIPTextModel(CLIPPreTrainedModel): | ||||
|     def set_input_embeddings(self, value): | ||||
|         self.text_model.embeddings.token_embedding = value | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
| @ -704,6 +726,7 @@ class CLIPVisionModel(CLIPPreTrainedModel): | ||||
|         return self.vision_model.embeddings.patch_embedding | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
| @ -743,6 +766,7 @@ class CLIPVisionModel(CLIPPreTrainedModel): | ||||
| class CLIPModel(CLIPPreTrainedModel): | ||||
|     config: CLIPConfig | ||||
|     _no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer", "CLIPVisionEmbeddings"] | ||||
|     _supports_flash_attn = False  # mask creation only accounts for sdpa/eager | ||||
|  | ||||
|     def __init__(self, config: CLIPConfig): | ||||
|         super().__init__(config) | ||||
| @ -942,6 +966,7 @@ class CLIPTextModelWithProjection(CLIPPreTrainedModel): | ||||
|     config: CLIPTextConfig | ||||
|     input_modalities = "text" | ||||
|  | ||||
|     _supports_flash_attn = False | ||||
|     _no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer"] | ||||
|  | ||||
|     def __init__(self, config: CLIPTextConfig): | ||||
| @ -961,7 +986,8 @@ class CLIPTextModelWithProjection(CLIPPreTrainedModel): | ||||
|     def set_input_embeddings(self, value): | ||||
|         self.text_model.embeddings.token_embedding = value | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
| @ -1023,6 +1049,7 @@ class CLIPVisionModelWithProjection(CLIPPreTrainedModel): | ||||
|         return self.vision_model.embeddings.patch_embedding | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
| @ -1090,7 +1117,8 @@ class CLIPForImageClassification(CLIPPreTrainedModel): | ||||
|         # Initialize weights and apply final processing | ||||
|         self.post_init() | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|  | ||||
| @ -1392,7 +1392,7 @@ class Emu3Model(Emu3PreTrainedModel): | ||||
|         image_features = torch.split(image_features, split_sizes) | ||||
|         return image_features | ||||
|  | ||||
|     @torch.no_grad() | ||||
|     @torch.no_grad | ||||
|     def decode_image_tokens(self, image_tokens: torch.LongTensor, height: int, width: int): | ||||
|         """ | ||||
|         Decodes generated image tokens from language model to continuous pixel values | ||||
|  | ||||
| @ -946,7 +946,7 @@ class Emu3Model(Emu3PreTrainedModel): | ||||
|         image_features = torch.split(image_features, split_sizes) | ||||
|         return image_features | ||||
|  | ||||
|     @torch.no_grad() | ||||
|     @torch.no_grad | ||||
|     def decode_image_tokens(self, image_tokens: torch.LongTensor, height: int, width: int): | ||||
|         """ | ||||
|         Decodes generated image tokens from language model to continuous pixel values | ||||
|  | ||||
| @ -473,9 +473,6 @@ def convert_florence2_checkpoint(hf_model_id, pytorch_dump_folder, output_hub_pa | ||||
|  | ||||
|     vision_config = convert_config(hf_config.vision_config.__dict__) | ||||
|     text_config = hf_config.text_config.__dict__ | ||||
|     if text_config.get("model_type") == "florence2_language": | ||||
|         text_config["model_type"] = "bart" | ||||
|  | ||||
|     config = Florence2Config( | ||||
|         text_config=text_config, | ||||
|         vision_config=vision_config, | ||||
|  | ||||
| @ -156,7 +156,7 @@ class Gemma3TextConfig(PreTrainedConfig): | ||||
|         layer_types: Optional[list[str]] = None, | ||||
|         final_logit_softcapping: Optional[float] = None, | ||||
|         attn_logit_softcapping: Optional[float] = None, | ||||
|         rope_parameters: Optional[RopeParameters | dict[str, RopeParameters]] = None, | ||||
|         rope_parameters: Optional[RopeParameters | dict[RopeParameters]] = None, | ||||
|         use_bidirectional_attention: Optional[bool] = False, | ||||
|         **kwargs, | ||||
|     ): | ||||
| @ -186,16 +186,10 @@ class Gemma3TextConfig(PreTrainedConfig): | ||||
|         self.final_logit_softcapping = final_logit_softcapping | ||||
|         self.attn_logit_softcapping = attn_logit_softcapping | ||||
|         self.layer_types = layer_types | ||||
|  | ||||
|         # Try to set `rope_scaling` if available, otherwise use `rope_parameters` | ||||
|         if (rope_scaling := kwargs.pop("rope_scaling", None)) is not None: | ||||
|             if rope_parameters is None: | ||||
|                 rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling} | ||||
|             elif "full_attention" in rope_parameters: | ||||
|                 rope_parameters["full_attention"].update(rope_scaling) | ||||
|             else: | ||||
|                 rope_parameters.update(rope_scaling) | ||||
|  | ||||
|         rope_scaling = kwargs.pop("rope_scaling", None) | ||||
|         if rope_scaling is not None: | ||||
|             rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling} | ||||
|         self.rope_parameters = rope_parameters | ||||
|         self.use_bidirectional_attention = use_bidirectional_attention | ||||
|         if use_bidirectional_attention: | ||||
|  | ||||
| @ -191,10 +191,7 @@ _VARIANTS = { | ||||
|             num_hidden_layers=34, | ||||
|             num_key_value_heads=4, | ||||
|             sliding_window=1024, | ||||
|             rope_parameters={ | ||||
|                 "full_attention": {"rope_type": "linear", "factor": 8.0}, | ||||
|                 "sliding_attention": {"rope_type": "default"}, | ||||
|             }, | ||||
|             rope_parameters={"rope_type": "linear", "factor": 8.0},  # used for global RoPE only | ||||
|             rope_theta=1_000_000, | ||||
|             rope_local_base_freq=10_000, | ||||
|             attn_logit_softcapping=None, | ||||
| @ -212,10 +209,7 @@ _VARIANTS = { | ||||
|             num_hidden_layers=48, | ||||
|             num_key_value_heads=8, | ||||
|             sliding_window=1024, | ||||
|             rope_parameters={ | ||||
|                 "full_attention": {"rope_type": "linear", "factor": 8.0}, | ||||
|                 "sliding_attention": {"rope_type": "default"}, | ||||
|             }, | ||||
|             rope_parameters={"rope_type": "linear", "factor": 8.0},  # used for global RoPE only | ||||
|             rope_theta=1_000_000, | ||||
|             rope_local_base_freq=10_000, | ||||
|             attn_logit_softcapping=None, | ||||
| @ -233,10 +227,7 @@ _VARIANTS = { | ||||
|             num_key_value_heads=16, | ||||
|             head_dim=128, | ||||
|             sliding_window=1024, | ||||
|             rope_parameters={ | ||||
|                 "full_attention": {"rope_type": "linear", "factor": 8.0}, | ||||
|                 "sliding_attention": {"rope_type": "default"}, | ||||
|             }, | ||||
|             rope_parameters={"rope_type": "linear", "factor": 8.0},  # used for global RoPE only | ||||
|             rope_theta=1_000_000, | ||||
|             rope_local_base_freq=10_000, | ||||
|             attn_logit_softcapping=None, | ||||
|  | ||||
| @ -171,7 +171,7 @@ class Gemma3TextConfig(Gemma2Config, PreTrainedConfig): | ||||
|         layer_types: Optional[list[str]] = None, | ||||
|         final_logit_softcapping: Optional[float] = None, | ||||
|         attn_logit_softcapping: Optional[float] = None, | ||||
|         rope_parameters: Optional[RopeParameters | dict[str, RopeParameters]] = None, | ||||
|         rope_parameters: Optional[RopeParameters | dict[RopeParameters]] = None, | ||||
|         use_bidirectional_attention: Optional[bool] = False, | ||||
|         **kwargs, | ||||
|     ): | ||||
| @ -201,16 +201,10 @@ class Gemma3TextConfig(Gemma2Config, PreTrainedConfig): | ||||
|         self.final_logit_softcapping = final_logit_softcapping | ||||
|         self.attn_logit_softcapping = attn_logit_softcapping | ||||
|         self.layer_types = layer_types | ||||
|  | ||||
|         # Try to set `rope_scaling` if available, otherwise use `rope_parameters` | ||||
|         if (rope_scaling := kwargs.pop("rope_scaling", None)) is not None: | ||||
|             if rope_parameters is None: | ||||
|                 rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling} | ||||
|             elif "full_attention" in rope_parameters: | ||||
|                 rope_parameters["full_attention"].update(rope_scaling) | ||||
|             else: | ||||
|                 rope_parameters.update(rope_scaling) | ||||
|  | ||||
|         rope_scaling = kwargs.pop("rope_scaling", None) | ||||
|         if rope_scaling is not None: | ||||
|             rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling} | ||||
|         self.rope_parameters = rope_parameters | ||||
|         self.use_bidirectional_attention = use_bidirectional_attention | ||||
|         if use_bidirectional_attention: | ||||
|  | ||||
| @ -1283,7 +1283,7 @@ class JanusForConditionalGeneration(JanusPreTrainedModel, GenerationMixin): | ||||
|         decoded_image = decoded_image.permute(0, 2, 3, 1) | ||||
|         return decoded_image | ||||
|  | ||||
|     @torch.no_grad() | ||||
|     @torch.no_grad | ||||
|     def generate( | ||||
|         self, | ||||
|         inputs: Optional[torch.Tensor] = None, | ||||
|  | ||||
| @ -1099,7 +1099,7 @@ class JanusForConditionalGeneration(JanusPreTrainedModel, GenerationMixin): | ||||
|         decoded_image = decoded_image.permute(0, 2, 3, 1) | ||||
|         return decoded_image | ||||
|  | ||||
|     @torch.no_grad() | ||||
|     @torch.no_grad | ||||
|     def generate( | ||||
|         self, | ||||
|         inputs: Optional[torch.Tensor] = None, | ||||
|  | ||||
| @ -209,8 +209,8 @@ class Lfm2VlImageProcessorFast(BaseImageProcessorFast): | ||||
|     do_normalize = True | ||||
|     do_pad = True | ||||
|     return_row_col_info = False | ||||
|     image_mean = IMAGENET_STANDARD_MEAN | ||||
|     image_std = IMAGENET_STANDARD_STD | ||||
|     image_mean = IMAGENET_STANDARD_STD | ||||
|     image_std = IMAGENET_STANDARD_MEAN | ||||
|     valid_kwargs = Lfm2VlImageProcessorKwargs | ||||
|     model_input_names = ["pixel_values", "pixel_attention_mask", "spatial_shapes"] | ||||
|  | ||||
|  | ||||
| @ -12,7 +12,7 @@ import torch | ||||
| from torch import nn | ||||
|  | ||||
| from ...activations import ACT2FN | ||||
| from ...masking_utils import create_causal_mask | ||||
| from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask | ||||
| from ...modeling_layers import GradientCheckpointingLayer | ||||
| from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput | ||||
| from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel | ||||
| @ -200,6 +200,7 @@ class MetaClip2Attention(nn.Module): | ||||
|         self, | ||||
|         hidden_states: torch.Tensor, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         causal_attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: | ||||
|         """Input shape: Batch x Time x Channel""" | ||||
| @ -213,6 +214,15 @@ class MetaClip2Attention(nn.Module): | ||||
|         queries = queries.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         keys = keys.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         values = values.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2) | ||||
|         # METACLIP_2 text model uses both `causal_attention_mask` and `attention_mask` | ||||
|         # in case FA2 kernel is called, `is_causal` should be inferred from `causal_attention_mask` | ||||
|         if self.config._attn_implementation == "flash_attention_2": | ||||
|             self.is_causal = causal_attention_mask is not None | ||||
|         else: | ||||
|             if attention_mask is not None and causal_attention_mask is not None: | ||||
|                 attention_mask = attention_mask + causal_attention_mask | ||||
|             elif causal_attention_mask is not None: | ||||
|                 attention_mask = causal_attention_mask | ||||
|  | ||||
|         attention_interface: Callable = eager_attention_forward | ||||
|         if self.config._attn_implementation != "eager": | ||||
| @ -224,12 +234,13 @@ class MetaClip2Attention(nn.Module): | ||||
|             keys, | ||||
|             values, | ||||
|             attention_mask, | ||||
|             is_causal=self.is_causal, | ||||
|             scaling=self.scale, | ||||
|             dropout=0.0 if not self.training else self.dropout, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
|         attn_output = attn_output.reshape(batch_size, seq_length, -1).contiguous() | ||||
|         attn_output = attn_output.reshape(batch_size, seq_length, embed_dim).contiguous() | ||||
|         attn_output = self.out_proj(attn_output) | ||||
|  | ||||
|         return attn_output, attn_weights | ||||
| @ -263,14 +274,16 @@ class MetaClip2EncoderLayer(GradientCheckpointingLayer): | ||||
|         self, | ||||
|         hidden_states: torch.Tensor, | ||||
|         attention_mask: torch.Tensor, | ||||
|         causal_attention_mask: torch.Tensor, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> torch.FloatTensor: | ||||
|         residual = hidden_states | ||||
|  | ||||
|         hidden_states = self.layer_norm1(hidden_states) | ||||
|         hidden_states, _ = self.self_attn( | ||||
|         hidden_states, attn_weights = self.self_attn( | ||||
|             hidden_states=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             causal_attention_mask=causal_attention_mask, | ||||
|             **kwargs, | ||||
|         ) | ||||
|         hidden_states = residual + hidden_states | ||||
| @ -374,6 +387,7 @@ class MetaClip2Encoder(nn.Module): | ||||
|         self, | ||||
|         inputs_embeds, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         causal_attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> BaseModelOutput: | ||||
|         r""" | ||||
| @ -388,6 +402,13 @@ class MetaClip2Encoder(nn.Module): | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|  | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|             causal_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): | ||||
|                 Causal mask for the text model. Mask values selected in `[0, 1]`: | ||||
|  | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|  | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|         """ | ||||
|         hidden_states = inputs_embeds | ||||
| @ -395,6 +416,7 @@ class MetaClip2Encoder(nn.Module): | ||||
|             hidden_states = encoder_layer( | ||||
|                 hidden_states, | ||||
|                 attention_mask, | ||||
|                 causal_attention_mask, | ||||
|                 **kwargs, | ||||
|             ) | ||||
|  | ||||
| @ -415,12 +437,14 @@ class MetaClip2TextTransformer(nn.Module): | ||||
|         # For `pooled_output` computation | ||||
|         self.eos_token_id = config.eos_token_id | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         use_cache: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> BaseModelOutputWithPooling: | ||||
|         input_shape = input_ids.size() | ||||
| @ -428,19 +452,21 @@ class MetaClip2TextTransformer(nn.Module): | ||||
|  | ||||
|         hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) | ||||
|  | ||||
|         attention_mask = create_causal_mask( | ||||
|             config=self.config, | ||||
|             input_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device), | ||||
|             past_key_values=None, | ||||
|         # CLIP's text model uses causal mask, prepare it here. | ||||
|         # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324 | ||||
|         causal_attention_mask = _create_4d_causal_attention_mask( | ||||
|             input_shape, hidden_states.dtype, device=hidden_states.device | ||||
|         ) | ||||
|  | ||||
|         kwargs.pop("is_causal", None) | ||||
|         # expand attention_mask | ||||
|         if attention_mask is not None and self.config._attn_implementation != "flash_attention_2": | ||||
|             # [batch_size, seq_len] -> [batch_size, 1, tgt_seq_len, src_seq_len] | ||||
|             attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype) | ||||
|  | ||||
|         encoder_outputs: BaseModelOutput = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             is_causal=True, | ||||
|             causal_attention_mask=causal_attention_mask, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
| @ -501,6 +527,7 @@ class MetaClip2TextModel(MetaClip2PreTrainedModel): | ||||
|     input_modalities = "text" | ||||
|  | ||||
|     _no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer"] | ||||
|     _supports_flash_attn = False  # mask creation only accounts for sdpa/eager | ||||
|  | ||||
|     def __init__(self, config: MetaClip2TextConfig): | ||||
|         super().__init__(config) | ||||
| @ -514,13 +541,16 @@ class MetaClip2TextModel(MetaClip2PreTrainedModel): | ||||
|     def set_input_embeddings(self, value): | ||||
|         self.text_model.embeddings.token_embedding = value | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids: Optional[torch.Tensor] = None, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> BaseModelOutputWithPooling: | ||||
|         r""" | ||||
| @ -600,6 +630,7 @@ class MetaClip2TextModelWithProjection(MetaClip2PreTrainedModel): | ||||
|     config: MetaClip2TextConfig | ||||
|     input_modalities = "text" | ||||
|  | ||||
|     _supports_flash_attn = False | ||||
|     _no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer"] | ||||
|  | ||||
|     def __init__(self, config: MetaClip2TextConfig): | ||||
| @ -619,13 +650,16 @@ class MetaClip2TextModelWithProjection(MetaClip2PreTrainedModel): | ||||
|     def set_input_embeddings(self, value): | ||||
|         self.text_model.embeddings.token_embedding = value | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids: Optional[torch.Tensor] = None, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> MetaClip2TextModelOutput: | ||||
|         r""" | ||||
| @ -758,6 +792,7 @@ class MetaClip2Model(MetaClip2PreTrainedModel): | ||||
|  | ||||
|     config: MetaClip2Config | ||||
|     _no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer", "MetaClip2VisionEmbeddings"] | ||||
|     _supports_flash_attn = False  # mask creation only accounts for sdpa/eager | ||||
|  | ||||
|     def __init__(self, config: MetaClip2Config): | ||||
|         super().__init__(config) | ||||
| @ -1043,7 +1078,7 @@ class MetaClip2VisionModel(MetaClip2PreTrainedModel): | ||||
|         return self.vision_model.embeddings.patch_embedding | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     @can_return_tuple | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
| @ -1152,6 +1187,7 @@ class MetaClip2VisionModelWithProjection(MetaClip2PreTrainedModel): | ||||
|         return self.vision_model.embeddings.patch_embedding | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
| @ -1218,7 +1254,8 @@ class MetaClip2ForImageClassification(MetaClip2PreTrainedModel): | ||||
|         # Initialize weights and apply final processing | ||||
|         self.post_init() | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|  | ||||
| @ -3,8 +3,9 @@ from typing import Optional | ||||
| import torch | ||||
| from torch import nn | ||||
|  | ||||
| from ...masking_utils import create_causal_mask | ||||
| from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask | ||||
| from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling | ||||
| from ...modeling_utils import PreTrainedModel | ||||
| from ...processing_utils import Unpack | ||||
| from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging | ||||
| from ...utils.generic import check_model_inputs | ||||
| @ -12,9 +13,9 @@ from ..clip.configuration_clip import CLIPConfig, CLIPTextConfig, CLIPVisionConf | ||||
| from ..clip.modeling_clip import ( | ||||
|     CLIPMLP, | ||||
|     CLIPAttention, | ||||
|     CLIPEncoderLayer, | ||||
|     CLIPForImageClassification, | ||||
|     CLIPModel, | ||||
|     CLIPPreTrainedModel, | ||||
|     CLIPTextEmbeddings, | ||||
|     CLIPTextModel, | ||||
|     CLIPTextModelWithProjection, | ||||
| @ -213,9 +214,24 @@ class MetaClip2MLP(CLIPMLP): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class MetaClip2EncoderLayer(CLIPEncoderLayer): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| @auto_docstring | ||||
| class MetaClip2PreTrainedModel(CLIPPreTrainedModel): | ||||
| class MetaClip2PreTrainedModel(PreTrainedModel): | ||||
|     config: MetaClip2Config | ||||
|     base_model_prefix = "metaclip_2" | ||||
|     input_modalities = ["image", "text"] | ||||
|     supports_gradient_checkpointing = True | ||||
|     _supports_sdpa = True | ||||
|     _supports_flash_attn = True | ||||
|     _supports_flex_attn = True | ||||
|     _supports_attention_backend = True | ||||
|     _can_record_outputs = { | ||||
|         "hidden_states": MetaClip2EncoderLayer, | ||||
|         "attentions": MetaClip2Attention, | ||||
|     } | ||||
|  | ||||
|     def _init_weights(self, module): | ||||
|         """Initialize the weights""" | ||||
| @ -275,12 +291,14 @@ class MetaClip2PreTrainedModel(CLIPPreTrainedModel): | ||||
|  | ||||
|  | ||||
| class MetaClip2TextTransformer(CLIPTextTransformer): | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         use_cache: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> BaseModelOutputWithPooling: | ||||
|         input_shape = input_ids.size() | ||||
| @ -288,19 +306,21 @@ class MetaClip2TextTransformer(CLIPTextTransformer): | ||||
|  | ||||
|         hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) | ||||
|  | ||||
|         attention_mask = create_causal_mask( | ||||
|             config=self.config, | ||||
|             input_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device), | ||||
|             past_key_values=None, | ||||
|         # CLIP's text model uses causal mask, prepare it here. | ||||
|         # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324 | ||||
|         causal_attention_mask = _create_4d_causal_attention_mask( | ||||
|             input_shape, hidden_states.dtype, device=hidden_states.device | ||||
|         ) | ||||
|  | ||||
|         kwargs.pop("is_causal", None) | ||||
|         # expand attention_mask | ||||
|         if attention_mask is not None and self.config._attn_implementation != "flash_attention_2": | ||||
|             # [batch_size, seq_len] -> [batch_size, 1, tgt_seq_len, src_seq_len] | ||||
|             attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype) | ||||
|  | ||||
|         encoder_outputs: BaseModelOutput = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             attention_mask=attention_mask, | ||||
|             is_causal=True, | ||||
|             causal_attention_mask=causal_attention_mask, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
| @ -352,13 +372,22 @@ class MetaClip2TextModel(CLIPTextModel): | ||||
|     >>> pooled_output = outputs.pooler_output  # pooled (EOS token) states | ||||
|     ```""" | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     def __init__(self, config: MetaClip2TextConfig): | ||||
|         super().__init__(config) | ||||
|         self.text_model = MetaClip2TextTransformer(config) | ||||
|         # Initialize weights and apply final processing | ||||
|         self.post_init() | ||||
|  | ||||
|     @check_model_inputs() | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids: Optional[torch.Tensor] = None, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ): | ||||
|         r""" | ||||
| @ -380,6 +409,8 @@ class MetaClip2TextModel(CLIPTextModel): | ||||
|             input_ids=input_ids, | ||||
|             attention_mask=attention_mask, | ||||
|             position_ids=position_ids, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
| @ -415,13 +446,24 @@ class MetaClip2TextModelWithProjection(CLIPTextModelWithProjection): | ||||
|     >>> text_embeds = outputs.text_embeds | ||||
|     ```""" | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def __init__(self, config: MetaClip2TextConfig): | ||||
|         super().__init__(config) | ||||
|  | ||||
|         text_model = MetaClip2TextModel._from_config(config) | ||||
|         self.text_model = text_model.text_model | ||||
|  | ||||
|         self.text_projection = nn.Linear(config.hidden_size, config.projection_dim, bias=False) | ||||
|  | ||||
|         # Initialize weights and apply final processing | ||||
|         self.post_init() | ||||
|  | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids: Optional[torch.Tensor] = None, | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         position_ids: Optional[torch.Tensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ): | ||||
|         r""" | ||||
| @ -442,6 +484,8 @@ class MetaClip2TextModelWithProjection(CLIPTextModelWithProjection): | ||||
|             input_ids=input_ids, | ||||
|             attention_mask=attention_mask, | ||||
|             position_ids=position_ids, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
| @ -506,8 +550,6 @@ class MetaClip2Model(CLIPModel): | ||||
|         # Initialize weights and apply final processing | ||||
|         self.post_init() | ||||
|  | ||||
|     @can_return_tuple | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         input_ids: Optional[torch.LongTensor] = None, | ||||
| @ -652,7 +694,7 @@ class MetaClip2VisionModel(CLIPVisionModel): | ||||
|     ```""" | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     @can_return_tuple | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
| @ -722,8 +764,6 @@ class MetaClip2VisionModelWithProjection(CLIPVisionModelWithProjection): | ||||
|     >>> image_embeds = outputs.image_embeds | ||||
|     ```""" | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|  | ||||
| @ -25,12 +25,12 @@ import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
| from ...activations import ACT2FN | ||||
| from ...modeling_flash_attention_utils import FlashAttentionKwargs | ||||
| from ...modeling_layers import GradientCheckpointingLayer | ||||
| from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling | ||||
| from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel | ||||
| from ...processing_utils import Unpack | ||||
| from ...utils import TransformersKwargs, auto_docstring, torch_int | ||||
| from ...utils.generic import check_model_inputs | ||||
| from .configuration_mlcd import MLCDVisionConfig | ||||
|  | ||||
|  | ||||
| @ -259,7 +259,7 @@ class MLCDAttention(nn.Module): | ||||
|         hidden_states: torch.Tensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         **kwargs: Unpack[FlashAttentionKwargs], | ||||
|     ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: | ||||
|         """Input shape: Batch x Time x Channel""" | ||||
|         batch_size, seq_length = hidden_states.shape[:-1] | ||||
| @ -316,7 +316,7 @@ class MLCDEncoderLayer(GradientCheckpointingLayer): | ||||
|         hidden_states: torch.Tensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = False, | ||||
|     ) -> tuple[torch.FloatTensor]: | ||||
|         """ | ||||
|         Args: | ||||
| @ -328,15 +328,18 @@ class MLCDEncoderLayer(GradientCheckpointingLayer): | ||||
|                 Represents absolute positional embeddings for the query and key in the attention mechanism. | ||||
|             attention_mask (`torch.FloatTensor`): | ||||
|                 Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values. | ||||
|             output_attentions (`bool`, *optional*, defaults to `False`): | ||||
|                 Whether or not to return the attentions tensors of all attention layers. See `attentions` under | ||||
|                 returned tensors for more detail. | ||||
|         """ | ||||
|         residual = hidden_states | ||||
|  | ||||
|         hidden_states = self.layer_norm1(hidden_states) | ||||
|         hidden_states, _ = self.self_attn( | ||||
|         hidden_states, attn_weights = self.self_attn( | ||||
|             hidden_states=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             attention_mask=attention_mask, | ||||
|             **kwargs, | ||||
|             output_attentions=output_attentions, | ||||
|         ) | ||||
|         hidden_states = residual + hidden_states | ||||
|  | ||||
| @ -345,7 +348,12 @@ class MLCDEncoderLayer(GradientCheckpointingLayer): | ||||
|         hidden_states = self.mlp(hidden_states) | ||||
|         hidden_states = residual + hidden_states | ||||
|  | ||||
|         return hidden_states | ||||
|         outputs = (hidden_states,) | ||||
|  | ||||
|         if output_attentions: | ||||
|             outputs += (attn_weights,) | ||||
|  | ||||
|         return outputs | ||||
|  | ||||
|  | ||||
| class MLCDEncoder(nn.Module): | ||||
| @ -369,7 +377,9 @@ class MLCDEncoder(nn.Module): | ||||
|         inputs_embeds: torch.FloatTensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutput]: | ||||
|         r""" | ||||
|         Args: | ||||
| @ -385,18 +395,114 @@ class MLCDEncoder(nn.Module): | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|             output_attentions (`bool`, *optional*): | ||||
|                 Whether or not to return the attentions tensors of all attention layers. See `attentions` under | ||||
|                 returned tensors for more detail. | ||||
|             output_hidden_states (`bool`, *optional*): | ||||
|                 Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors | ||||
|                 for more detail. | ||||
|             return_dict (`bool`, *optional*): | ||||
|                 Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. | ||||
|         """ | ||||
|  | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         encoder_states = () if output_hidden_states else None | ||||
|         all_attentions = () if output_attentions else None | ||||
|  | ||||
|         hidden_states = inputs_embeds | ||||
|         for encoder_layer in self.layers: | ||||
|             hidden_states = encoder_layer( | ||||
|                 hidden_states, | ||||
|                 position_embeddings, | ||||
|                 attention_mask, | ||||
|                 **kwargs, | ||||
|         for idx, encoder_layer in enumerate(self.layers): | ||||
|             if output_hidden_states: | ||||
|                 encoder_states = encoder_states + (hidden_states,) | ||||
|             layer_outputs = encoder_layer( | ||||
|                 hidden_states=hidden_states, | ||||
|                 position_embeddings=position_embeddings, | ||||
|                 attention_mask=attention_mask, | ||||
|                 output_attentions=output_attentions, | ||||
|             ) | ||||
|  | ||||
|             hidden_states = layer_outputs[0] | ||||
|  | ||||
|             if output_attentions: | ||||
|                 all_attentions = all_attentions + (layer_outputs[1],) | ||||
|  | ||||
|         if output_hidden_states: | ||||
|             encoder_states = encoder_states + (hidden_states,) | ||||
|  | ||||
|         if not return_dict: | ||||
|             return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None) | ||||
|         return BaseModelOutput( | ||||
|             last_hidden_state=hidden_states, | ||||
|             hidden_states=encoder_states, | ||||
|             attentions=all_attentions, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class MLCDVisionTransformer(nn.Module): | ||||
|     def __init__(self, config: MLCDVisionConfig): | ||||
|         super().__init__() | ||||
|         self.config = config | ||||
|         embed_dim = config.hidden_size | ||||
|  | ||||
|         self.embeddings = MLCDVisionEmbeddings(config) | ||||
|         self.pre_layrnorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) | ||||
|         self.encoder = MLCDEncoder(config) | ||||
|         self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) | ||||
|         self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2) | ||||
|         self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2)) | ||||
|  | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         if pixel_values is None: | ||||
|             raise ValueError("You have to specify pixel_values") | ||||
|  | ||||
|         num_patches_height = pixel_values.shape[-2] // self.config.patch_size | ||||
|         num_patches_width = pixel_values.shape[-1] // self.config.patch_size | ||||
|         rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width) | ||||
|         rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device) | ||||
|         rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0) | ||||
|         emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) | ||||
|         position_embeddings = (emb.cos(), emb.sin()) | ||||
|  | ||||
|         hidden_states = self.embeddings(pixel_values) | ||||
|         hidden_states = self.pre_layrnorm(hidden_states) | ||||
|  | ||||
|         encoder_outputs = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             return_dict=return_dict, | ||||
|         ) | ||||
|  | ||||
|         last_hidden_state = encoder_outputs[0] | ||||
|         pooled_output = last_hidden_state[:, 0, :] | ||||
|         pooled_output = self.post_layernorm(pooled_output) | ||||
|  | ||||
|         if not return_dict: | ||||
|             return (last_hidden_state, pooled_output) + encoder_outputs[1:] | ||||
|  | ||||
|         return BaseModelOutputWithPooling( | ||||
|             last_hidden_state=last_hidden_state, | ||||
|             pooler_output=pooled_output, | ||||
|             hidden_states=encoder_outputs.hidden_states, | ||||
|             attentions=encoder_outputs.attentions, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @ -405,15 +511,8 @@ class MLCDPreTrainedModel(PreTrainedModel): | ||||
|     config: MLCDVisionConfig | ||||
|     base_model_prefix = "mlcd" | ||||
|     supports_gradient_checkpointing = True | ||||
|     accepts_loss_kwargs = False | ||||
|     _supports_flash_attn = True | ||||
|     _supports_sdpa = True | ||||
|     _supports_flex_attn = True | ||||
|     _supports_attention_backend = True | ||||
|     _can_record_outputs = { | ||||
|         "hidden_states": MLCDEncoderLayer, | ||||
|         "attentions": MLCDAttention, | ||||
|     } | ||||
|  | ||||
|     def _init_weights(self, module): | ||||
|         """Initialize the weights""" | ||||
| @ -447,55 +546,6 @@ class MLCDPreTrainedModel(PreTrainedModel): | ||||
|             module.bias.data.zero_() | ||||
|  | ||||
|  | ||||
| class MLCDVisionTransformer(nn.Module): | ||||
|     def __init__(self, config: MLCDVisionConfig): | ||||
|         super().__init__() | ||||
|         self.config = config | ||||
|         embed_dim = config.hidden_size | ||||
|  | ||||
|         self.embeddings = MLCDVisionEmbeddings(config) | ||||
|         self.pre_layrnorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) | ||||
|         self.encoder = MLCDEncoder(config) | ||||
|         self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) | ||||
|         self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2) | ||||
|         self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2)) | ||||
|  | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         if pixel_values is None: | ||||
|             raise ValueError("You have to specify pixel_values") | ||||
|  | ||||
|         num_patches_height = pixel_values.shape[-2] // self.config.patch_size | ||||
|         num_patches_width = pixel_values.shape[-1] // self.config.patch_size | ||||
|         rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width) | ||||
|         rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device) | ||||
|         rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0) | ||||
|         emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) | ||||
|         position_embeddings = (emb.cos(), emb.sin()) | ||||
|  | ||||
|         hidden_states = self.embeddings(pixel_values) | ||||
|         hidden_states = self.pre_layrnorm(hidden_states) | ||||
|  | ||||
|         encoder_outputs = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
|         last_hidden_state = encoder_outputs[0] | ||||
|         pooled_output = last_hidden_state[:, 0, :] | ||||
|         pooled_output = self.post_layernorm(pooled_output) | ||||
|  | ||||
|         return BaseModelOutputWithPooling( | ||||
|             last_hidden_state=last_hidden_state, | ||||
|             pooler_output=pooled_output, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @auto_docstring( | ||||
|     custom_intro=""" | ||||
|     The vision model from M_L_C_D without any head or projection on top. | ||||
| @ -516,12 +566,13 @@ class MLCDVisionModel(MLCDPreTrainedModel): | ||||
|     def get_input_embeddings(self) -> nn.Module: | ||||
|         return self.vision_model.embeddings.patch_embedding | ||||
|  | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         r""" | ||||
|         Example: | ||||
| @ -545,9 +596,17 @@ class MLCDVisionModel(MLCDPreTrainedModel): | ||||
|         >>> print(f"Number of attention layers: {len(outputs.attentions)}") | ||||
|         >>> print(f"Attention shape: {outputs.attentions[0].shape}") | ||||
|         ```""" | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         return self.vision_model( | ||||
|             pixel_values=pixel_values, | ||||
|             **kwargs, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             return_dict=return_dict, | ||||
|         ) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -19,11 +19,11 @@ import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
| from ...configuration_utils import PreTrainedConfig | ||||
| from ...modeling_flash_attention_utils import FlashAttentionKwargs | ||||
| from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling | ||||
| from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel | ||||
| from ...processing_utils import Unpack | ||||
| from ...utils import TransformersKwargs, auto_docstring, logging | ||||
| from ...utils.generic import check_model_inputs | ||||
| from ...utils import auto_docstring, logging | ||||
| from ..clip.modeling_clip import ( | ||||
|     CLIPMLP, | ||||
|     CLIPAttention, | ||||
| @ -206,7 +206,7 @@ class MLCDAttention(CLIPAttention): | ||||
|         hidden_states: torch.Tensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         **kwargs: Unpack[FlashAttentionKwargs], | ||||
|     ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: | ||||
|         batch_size, seq_length = hidden_states.shape[:-1] | ||||
|  | ||||
| @ -258,7 +258,7 @@ class MLCDEncoderLayer(CLIPEncoderLayer): | ||||
|         hidden_states: torch.Tensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = False, | ||||
|     ) -> tuple[torch.FloatTensor]: | ||||
|         """ | ||||
|         Args: | ||||
| @ -270,15 +270,18 @@ class MLCDEncoderLayer(CLIPEncoderLayer): | ||||
|                 Represents absolute positional embeddings for the query and key in the attention mechanism. | ||||
|             attention_mask (`torch.FloatTensor`): | ||||
|                 Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values. | ||||
|             output_attentions (`bool`, *optional*, defaults to `False`): | ||||
|                 Whether or not to return the attentions tensors of all attention layers. See `attentions` under | ||||
|                 returned tensors for more detail. | ||||
|         """ | ||||
|         residual = hidden_states | ||||
|  | ||||
|         hidden_states = self.layer_norm1(hidden_states) | ||||
|         hidden_states, _ = self.self_attn( | ||||
|         hidden_states, attn_weights = self.self_attn( | ||||
|             hidden_states=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             attention_mask=attention_mask, | ||||
|             **kwargs, | ||||
|             output_attentions=output_attentions, | ||||
|         ) | ||||
|         hidden_states = residual + hidden_states | ||||
|  | ||||
| @ -287,7 +290,12 @@ class MLCDEncoderLayer(CLIPEncoderLayer): | ||||
|         hidden_states = self.mlp(hidden_states) | ||||
|         hidden_states = residual + hidden_states | ||||
|  | ||||
|         return hidden_states | ||||
|         outputs = (hidden_states,) | ||||
|  | ||||
|         if output_attentions: | ||||
|             outputs += (attn_weights,) | ||||
|  | ||||
|         return outputs | ||||
|  | ||||
|  | ||||
| class MLCDEncoder(CLIPEncoder): | ||||
| @ -308,7 +316,9 @@ class MLCDEncoder(CLIPEncoder): | ||||
|         inputs_embeds: torch.FloatTensor, | ||||
|         position_embeddings: tuple[torch.Tensor, torch.Tensor], | ||||
|         attention_mask: Optional[torch.Tensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutput]: | ||||
|         r""" | ||||
|         Args: | ||||
| @ -324,18 +334,107 @@ class MLCDEncoder(CLIPEncoder): | ||||
|                 - 1 for tokens that are **not masked**, | ||||
|                 - 0 for tokens that are **masked**. | ||||
|                 [What are attention masks?](../glossary#attention-mask) | ||||
|             output_attentions (`bool`, *optional*): | ||||
|                 Whether or not to return the attentions tensors of all attention layers. See `attentions` under | ||||
|                 returned tensors for more detail. | ||||
|             output_hidden_states (`bool`, *optional*): | ||||
|                 Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors | ||||
|                 for more detail. | ||||
|             return_dict (`bool`, *optional*): | ||||
|                 Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. | ||||
|         """ | ||||
|  | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         encoder_states = () if output_hidden_states else None | ||||
|         all_attentions = () if output_attentions else None | ||||
|  | ||||
|         hidden_states = inputs_embeds | ||||
|         for encoder_layer in self.layers: | ||||
|             hidden_states = encoder_layer( | ||||
|                 hidden_states, | ||||
|                 position_embeddings, | ||||
|                 attention_mask, | ||||
|                 **kwargs, | ||||
|         for idx, encoder_layer in enumerate(self.layers): | ||||
|             if output_hidden_states: | ||||
|                 encoder_states = encoder_states + (hidden_states,) | ||||
|             layer_outputs = encoder_layer( | ||||
|                 hidden_states=hidden_states, | ||||
|                 position_embeddings=position_embeddings, | ||||
|                 attention_mask=attention_mask, | ||||
|                 output_attentions=output_attentions, | ||||
|             ) | ||||
|  | ||||
|             hidden_states = layer_outputs[0] | ||||
|  | ||||
|             if output_attentions: | ||||
|                 all_attentions = all_attentions + (layer_outputs[1],) | ||||
|  | ||||
|         if output_hidden_states: | ||||
|             encoder_states = encoder_states + (hidden_states,) | ||||
|  | ||||
|         if not return_dict: | ||||
|             return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None) | ||||
|         return BaseModelOutput( | ||||
|             last_hidden_state=hidden_states, | ||||
|             hidden_states=encoder_states, | ||||
|             attentions=all_attentions, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class MLCDVisionTransformer(CLIPVisionTransformer): | ||||
|     def __init__(self, config: MLCDVisionConfig): | ||||
|         super().__init__(config) | ||||
|         self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2) | ||||
|         self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2)) | ||||
|  | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         if pixel_values is None: | ||||
|             raise ValueError("You have to specify pixel_values") | ||||
|  | ||||
|         num_patches_height = pixel_values.shape[-2] // self.config.patch_size | ||||
|         num_patches_width = pixel_values.shape[-1] // self.config.patch_size | ||||
|         rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width) | ||||
|         rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device) | ||||
|         rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0) | ||||
|         emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) | ||||
|         position_embeddings = (emb.cos(), emb.sin()) | ||||
|  | ||||
|         hidden_states = self.embeddings(pixel_values) | ||||
|         hidden_states = self.pre_layrnorm(hidden_states) | ||||
|  | ||||
|         encoder_outputs = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             return_dict=return_dict, | ||||
|         ) | ||||
|  | ||||
|         last_hidden_state = encoder_outputs[0] | ||||
|         pooled_output = last_hidden_state[:, 0, :] | ||||
|         pooled_output = self.post_layernorm(pooled_output) | ||||
|  | ||||
|         if not return_dict: | ||||
|             return (last_hidden_state, pooled_output) + encoder_outputs[1:] | ||||
|  | ||||
|         return BaseModelOutputWithPooling( | ||||
|             last_hidden_state=last_hidden_state, | ||||
|             pooler_output=pooled_output, | ||||
|             hidden_states=encoder_outputs.hidden_states, | ||||
|             attentions=encoder_outputs.attentions, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @ -344,15 +443,8 @@ class MLCDPreTrainedModel(PreTrainedModel): | ||||
|     config: MLCDVisionConfig | ||||
|     base_model_prefix = "mlcd" | ||||
|     supports_gradient_checkpointing = True | ||||
|     accepts_loss_kwargs = False | ||||
|     _supports_flash_attn = True | ||||
|     _supports_sdpa = True | ||||
|     _supports_flex_attn = True | ||||
|     _supports_attention_backend = True | ||||
|     _can_record_outputs = { | ||||
|         "hidden_states": MLCDEncoderLayer, | ||||
|         "attentions": MLCDAttention, | ||||
|     } | ||||
|  | ||||
|     def _init_weights(self, module): | ||||
|         """Initialize the weights""" | ||||
| @ -386,55 +478,14 @@ class MLCDPreTrainedModel(PreTrainedModel): | ||||
|             module.bias.data.zero_() | ||||
|  | ||||
|  | ||||
| class MLCDVisionTransformer(CLIPVisionTransformer): | ||||
|     def __init__(self, config: MLCDVisionConfig): | ||||
|         super().__init__(config) | ||||
|         self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2) | ||||
|         self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2)) | ||||
|  | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         if pixel_values is None: | ||||
|             raise ValueError("You have to specify pixel_values") | ||||
|  | ||||
|         num_patches_height = pixel_values.shape[-2] // self.config.patch_size | ||||
|         num_patches_width = pixel_values.shape[-1] // self.config.patch_size | ||||
|         rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width) | ||||
|         rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device) | ||||
|         rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0) | ||||
|         emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) | ||||
|         position_embeddings = (emb.cos(), emb.sin()) | ||||
|  | ||||
|         hidden_states = self.embeddings(pixel_values) | ||||
|         hidden_states = self.pre_layrnorm(hidden_states) | ||||
|  | ||||
|         encoder_outputs = self.encoder( | ||||
|             inputs_embeds=hidden_states, | ||||
|             position_embeddings=position_embeddings, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
|         last_hidden_state = encoder_outputs[0] | ||||
|         pooled_output = last_hidden_state[:, 0, :] | ||||
|         pooled_output = self.post_layernorm(pooled_output) | ||||
|  | ||||
|         return BaseModelOutputWithPooling( | ||||
|             last_hidden_state=last_hidden_state, | ||||
|             pooler_output=pooled_output, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class MLCDVisionModel(CLIPVisionModel): | ||||
|     @check_model_inputs(tie_last_hidden_states=False) | ||||
|     @auto_docstring | ||||
|     def forward( | ||||
|         self, | ||||
|         pixel_values: Optional[torch.FloatTensor] = None, | ||||
|         **kwargs: Unpack[TransformersKwargs], | ||||
|         output_attentions: Optional[bool] = None, | ||||
|         output_hidden_states: Optional[bool] = None, | ||||
|         return_dict: Optional[bool] = None, | ||||
|     ) -> Union[tuple, BaseModelOutputWithPooling]: | ||||
|         r""" | ||||
|         Example: | ||||
| @ -458,9 +509,17 @@ class MLCDVisionModel(CLIPVisionModel): | ||||
|         >>> print(f"Number of attention layers: {len(outputs.attentions)}") | ||||
|         >>> print(f"Attention shape: {outputs.attentions[0].shape}") | ||||
|         ```""" | ||||
|         output_hidden_states = ( | ||||
|             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | ||||
|         ) | ||||
|         return_dict = return_dict if return_dict is not None else self.config.use_return_dict | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|  | ||||
|         return self.vision_model( | ||||
|             pixel_values=pixel_values, | ||||
|             **kwargs, | ||||
|             output_attentions=output_attentions, | ||||
|             output_hidden_states=output_hidden_states, | ||||
|             return_dict=return_dict, | ||||
|         ) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -343,12 +343,12 @@ class PLBartEncoder(PLBartPreTrainedModel): | ||||
|         self.max_source_positions = config.max_position_embeddings | ||||
|         embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0 | ||||
|  | ||||
|         self.embed_tokens = PLBartScaledWordEmbedding( | ||||
|             config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale | ||||
|         ) | ||||
|  | ||||
|         if embed_tokens is not None: | ||||
|             self.embed_tokens = embed_tokens | ||||
|         else: | ||||
|             self.embed_tokens = PLBartScaledWordEmbedding( | ||||
|                 config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale | ||||
|             ) | ||||
|             self.embed_tokens.weight = embed_tokens.weight | ||||
|  | ||||
|         self.embed_positions = PLBartLearnedPositionalEmbedding( | ||||
|             config.max_position_embeddings, | ||||
| @ -595,12 +595,12 @@ class PLBartDecoder(PLBartPreTrainedModel): | ||||
|         self.max_target_positions = config.max_position_embeddings | ||||
|         embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0 | ||||
|  | ||||
|         self.embed_tokens = PLBartScaledWordEmbedding( | ||||
|             config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale | ||||
|         ) | ||||
|  | ||||
|         if embed_tokens is not None: | ||||
|             self.embed_tokens = embed_tokens | ||||
|         else: | ||||
|             self.embed_tokens = PLBartScaledWordEmbedding( | ||||
|                 config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale | ||||
|             ) | ||||
|             self.embed_tokens.weight = embed_tokens.weight | ||||
|  | ||||
|         self.embed_positions = PLBartLearnedPositionalEmbedding( | ||||
|             config.max_position_embeddings, | ||||
|  | ||||
| @ -1453,6 +1453,8 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi | ||||
|         Example: | ||||
|  | ||||
|         ```python | ||||
|         >>> from PIL import Image | ||||
|         >>> import requests | ||||
|         >>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration | ||||
|  | ||||
|         >>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") | ||||
| @ -1462,30 +1464,22 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": [ | ||||
|                     { | ||||
|                         "type": "image", | ||||
|                         "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg", | ||||
|                     }, | ||||
|                     {"type": "text", "text": "Describe the image."}, | ||||
|                     {"type": "image"}, | ||||
|                     {"type": "text", "text": "What is shown in this image?"}, | ||||
|                 ], | ||||
|             } | ||||
|             }, | ||||
|         ] | ||||
|         >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" | ||||
|         >>> image = Image.open(requests.get(url, stream=True).raw) | ||||
|  | ||||
|         >>> inputs = processor.apply_chat_template( | ||||
|             messages, | ||||
|             tokenize=True, | ||||
|             add_generation_prompt=True, | ||||
|             return_dict=True, | ||||
|             return_tensors="pt" | ||||
|         ) | ||||
|         >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | ||||
|         >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos]) | ||||
|  | ||||
|         >>> # Generate | ||||
|         >>> generated_ids = model.generate(**inputs, max_new_tokens=1024) | ||||
|         >>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | ||||
|         >>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         >>> print(output_text) | ||||
|         ``` | ||||
|         """ | ||||
|         >>> generate_ids = model.generate(inputs.input_ids, max_length=30) | ||||
|         >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..." | ||||
|         ```""" | ||||
|  | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|         output_hidden_states = ( | ||||
|  | ||||
| @ -684,6 +684,8 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration): | ||||
|         Example: | ||||
|  | ||||
|         ```python | ||||
|         >>> from PIL import Image | ||||
|         >>> import requests | ||||
|         >>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration | ||||
|  | ||||
|         >>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") | ||||
| @ -693,30 +695,22 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration): | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": [ | ||||
|                     { | ||||
|                         "type": "image", | ||||
|                         "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg", | ||||
|                     }, | ||||
|                     {"type": "text", "text": "Describe the image."}, | ||||
|                     {"type": "image"}, | ||||
|                     {"type": "text", "text": "What is shown in this image?"}, | ||||
|                 ], | ||||
|             } | ||||
|             }, | ||||
|         ] | ||||
|         >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" | ||||
|         >>> image = Image.open(requests.get(url, stream=True).raw) | ||||
|  | ||||
|         >>> inputs = processor.apply_chat_template( | ||||
|             messages, | ||||
|             tokenize=True, | ||||
|             add_generation_prompt=True, | ||||
|             return_dict=True, | ||||
|             return_tensors="pt" | ||||
|         ) | ||||
|         >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | ||||
|         >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos]) | ||||
|  | ||||
|         >>> # Generate | ||||
|         >>> generated_ids = model.generate(**inputs, max_new_tokens=1024) | ||||
|         >>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | ||||
|         >>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         >>> print(output_text) | ||||
|         ``` | ||||
|         """ | ||||
|         >>> generate_ids = model.generate(inputs.input_ids, max_length=30) | ||||
|         >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..." | ||||
|         ```""" | ||||
|  | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|         output_hidden_states = ( | ||||
|  | ||||
| @ -25,7 +25,6 @@ from torch import nn | ||||
| from ...activations import ACT2FN | ||||
| from ...cache_utils import Cache | ||||
| from ...generation import GenerationMixin | ||||
| from ...masking_utils import create_bidirectional_mask | ||||
| from ...modeling_layers import GradientCheckpointingLayer | ||||
| from ...modeling_outputs import BaseModelOutput, ModelOutput | ||||
| from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel | ||||
| @ -775,19 +774,14 @@ class Qwen2AudioForConditionalGeneration(Qwen2AudioPreTrainedModel, GenerationMi | ||||
|                 lengths_expand = audio_feat_lengths.unsqueeze(1).expand(batch_size, max_seq_len) | ||||
|                 # Create mask | ||||
|                 padding_mask = seq_range >= lengths_expand | ||||
|                 audio_attention_mask_2d = (~padding_mask).to(dtype=torch.long, device=audio_feat_lengths.device) | ||||
|  | ||||
|                 dummy_embeds = torch.zeros( | ||||
|                     (batch_size, max_seq_len, 1), | ||||
|                     dtype=inputs_embeds.dtype, | ||||
|                     device=inputs_embeds.device, | ||||
|                 audio_attention_mask_ = padding_mask.view(batch_size, 1, 1, max_seq_len).expand( | ||||
|                     batch_size, 1, max_seq_len, max_seq_len | ||||
|                 ) | ||||
|  | ||||
|                 audio_attention_mask = create_bidirectional_mask( | ||||
|                     config=self.audio_tower.config, | ||||
|                     input_embeds=dummy_embeds, | ||||
|                     attention_mask=audio_attention_mask_2d, | ||||
|                 audio_attention_mask = audio_attention_mask_.to( | ||||
|                     dtype=self.audio_tower.conv1.weight.dtype, device=self.audio_tower.conv1.weight.device | ||||
|                 ) | ||||
|                 audio_attention_mask[audio_attention_mask_] = float("-inf") | ||||
|  | ||||
|                 audio_outputs = self.audio_tower(input_features, attention_mask=audio_attention_mask) | ||||
|                 selected_audio_feature = audio_outputs.last_hidden_state | ||||
|  | ||||
| @ -1348,6 +1348,8 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin): | ||||
|         Example: | ||||
|  | ||||
|         ```python | ||||
|         >>> from PIL import Image | ||||
|         >>> import requests | ||||
|         >>> from transformers import AutoProcessor, Qwen2VLForConditionalGeneration | ||||
|  | ||||
|         >>> model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct") | ||||
| @ -1357,30 +1359,22 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin): | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": [ | ||||
|                     { | ||||
|                         "type": "image", | ||||
|                         "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg", | ||||
|                     }, | ||||
|                     {"type": "text", "text": "Describe the image."}, | ||||
|                     {"type": "image"}, | ||||
|                     {"type": "text", "text": "What is shown in this image?"}, | ||||
|                 ], | ||||
|             } | ||||
|             }, | ||||
|         ] | ||||
|         >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" | ||||
|         >>> image = Image.open(requests.get(url, stream=True).raw) | ||||
|  | ||||
|         >>> inputs = processor.apply_chat_template( | ||||
|             messages, | ||||
|             tokenize=True, | ||||
|             add_generation_prompt=True, | ||||
|             return_dict=True, | ||||
|             return_tensors="pt" | ||||
|         ) | ||||
|         >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | ||||
|         >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos]) | ||||
|  | ||||
|         >>> # Generate | ||||
|         >>> generated_ids = model.generate(**inputs, max_new_tokens=1024) | ||||
|         >>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | ||||
|         >>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         >>> print(output_text) | ||||
|         ``` | ||||
|         """ | ||||
|         >>> generate_ids = model.generate(inputs.input_ids, max_length=30) | ||||
|         >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..." | ||||
|         ```""" | ||||
|  | ||||
|         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | ||||
|         output_hidden_states = ( | ||||
|  | ||||
| @ -1369,42 +1369,8 @@ class Qwen3VLForConditionalGeneration(Qwen3VLPreTrainedModel, GenerationMixin): | ||||
|             The temporal, height and width of feature shape of each video in LLM. | ||||
|  | ||||
|         Example: | ||||
|  | ||||
|         ```python | ||||
|         >>> from transformers import AutoProcessor, Qwen3VLForConditionalGeneration | ||||
|  | ||||
|         >>> model = Qwen3VLForConditionalGeneration.from_pretrained("Qwen/Qwen3-VL-8B-Instruct") | ||||
|         >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-8B-Instruct") | ||||
|  | ||||
|         >>> messages = [ | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": [ | ||||
|                     { | ||||
|                         "type": "image", | ||||
|                         "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg", | ||||
|                     }, | ||||
|                     {"type": "text", "text": "Describe the image."}, | ||||
|                 ], | ||||
|             } | ||||
|         ] | ||||
|  | ||||
|         >>> inputs = processor.apply_chat_template( | ||||
|             messages, | ||||
|             tokenize=True, | ||||
|             add_generation_prompt=True, | ||||
|             return_dict=True, | ||||
|             return_tensors="pt" | ||||
|         ) | ||||
|  | ||||
|         >>> # Generate | ||||
|         >>> generated_ids = model.generate(**inputs, max_new_tokens=1024) | ||||
|         >>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | ||||
|         >>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         >>> print(output_text) | ||||
|         ``` | ||||
|             TODO: Add example | ||||
|         """ | ||||
|  | ||||
|         outputs = self.model( | ||||
|             input_ids=input_ids, | ||||
|             pixel_values=pixel_values, | ||||
|  | ||||
| @ -1134,42 +1134,8 @@ class Qwen3VLForConditionalGeneration(Qwen2_5_VLForConditionalGeneration): | ||||
|             The temporal, height and width of feature shape of each video in LLM. | ||||
|  | ||||
|         Example: | ||||
|  | ||||
|         ```python | ||||
|         >>> from transformers import AutoProcessor, Qwen3VLForConditionalGeneration | ||||
|  | ||||
|         >>> model = Qwen3VLForConditionalGeneration.from_pretrained("Qwen/Qwen3-VL-8B-Instruct") | ||||
|         >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-8B-Instruct") | ||||
|  | ||||
|         >>> messages = [ | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": [ | ||||
|                     { | ||||
|                         "type": "image", | ||||
|                         "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg", | ||||
|                     }, | ||||
|                     {"type": "text", "text": "Describe the image."}, | ||||
|                 ], | ||||
|             } | ||||
|         ] | ||||
|  | ||||
|         >>> inputs = processor.apply_chat_template( | ||||
|             messages, | ||||
|             tokenize=True, | ||||
|             add_generation_prompt=True, | ||||
|             return_dict=True, | ||||
|             return_tensors="pt" | ||||
|         ) | ||||
|  | ||||
|         >>> # Generate | ||||
|         >>> generated_ids = model.generate(**inputs, max_new_tokens=1024) | ||||
|         >>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | ||||
|         >>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||||
|         >>> print(output_text) | ||||
|         ``` | ||||
|             TODO: Add example | ||||
|         """ | ||||
|  | ||||
|         outputs = self.model( | ||||
|             input_ids=input_ids, | ||||
|             pixel_values=pixel_values, | ||||
| @ -1350,6 +1316,7 @@ class Qwen3VLProcessor(Qwen2VLProcessor): | ||||
|                 video_metadata = videos_inputs.pop("video_metadata") | ||||
|             else: | ||||
|                 video_metadata = videos_inputs["video_metadata"] | ||||
|             video_grid_thw = videos_inputs["video_grid_thw"] | ||||
|         else: | ||||
|             videos_inputs = {} | ||||
|             video_grid_thw = None | ||||
|  | ||||
| @ -157,6 +157,7 @@ class Qwen3VLProcessor(ProcessorMixin): | ||||
|                 video_metadata = videos_inputs.pop("video_metadata") | ||||
|             else: | ||||
|                 video_metadata = videos_inputs["video_metadata"] | ||||
|             video_grid_thw = videos_inputs["video_grid_thw"] | ||||
|         else: | ||||
|             videos_inputs = {} | ||||
|             video_grid_thw = None | ||||
|  | ||||
| @ -206,7 +206,7 @@ class VoxtralProcessor(ProcessorMixin): | ||||
|         tokenizer_kwargs = {**processed_kwargs["template_kwargs"], **text_kwargs} | ||||
|         tokenizer_kwargs["return_tensors"] = None  # let's not return tensors here | ||||
|         tokenize = tokenizer_kwargs.pop("tokenize", False) | ||||
|         return_dict = tokenizer_kwargs.pop("return_dict", True) | ||||
|         return_dict = tokenizer_kwargs.pop("return_dict", False) | ||||
|  | ||||
|         encoded_instruct_inputs = self.tokenizer.apply_chat_template( | ||||
|             conversations, | ||||
|  | ||||
| @ -1603,7 +1603,7 @@ class ProcessorMixin(PushToHubMixin): | ||||
|             conversations = [conversation] | ||||
|  | ||||
|         tokenize = processed_kwargs["template_kwargs"].pop("tokenize", False) | ||||
|         return_dict = processed_kwargs["template_kwargs"].pop("return_dict", True) | ||||
|         return_dict = processed_kwargs["template_kwargs"].pop("return_dict", False) | ||||
|         mm_load_kwargs = processed_kwargs["mm_load_kwargs"] | ||||
|  | ||||
|         if tokenize: | ||||
|  | ||||
| @ -383,10 +383,6 @@ class Mxfp4HfQuantizer(HfQuantizer): | ||||
|  | ||||
|         state_dict = model.state_dict() | ||||
|  | ||||
|         # Get num_local_experts from model config | ||||
|         num_local_experts = getattr(model.config, "num_local_experts", 32) | ||||
|         hidden_size = getattr(model.config, "hidden_size", 2880) | ||||
|  | ||||
|         for name, module in model.named_modules(): | ||||
|             if ( | ||||
|                 isinstance(module, Mxfp4GptOssExperts) | ||||
| @ -396,7 +392,7 @@ class Mxfp4HfQuantizer(HfQuantizer): | ||||
|                 state_dict[f"{name}.gate_up_proj_blocks"] = ( | ||||
|                     module.gate_up_proj.storage.layout.unswizzle_data(module.gate_up_proj.storage.data) | ||||
|                     .transpose(-1, -2) | ||||
|                     .reshape(num_local_experts, -1, 90, 16) | ||||
|                     .reshape(32, -1, 90, 16) | ||||
|                 ) | ||||
|                 state_dict[f"{name}.gate_up_proj_scales"] = ( | ||||
|                     module.gate_up_proj_precision_config.weight_scale.storage.layout.unswizzle_data( | ||||
| @ -406,7 +402,7 @@ class Mxfp4HfQuantizer(HfQuantizer): | ||||
|                 state_dict[f"{name}.down_proj_blocks"] = ( | ||||
|                     module.down_proj.storage.layout.unswizzle_data(module.down_proj.storage.data) | ||||
|                     .transpose(-1, -2) | ||||
|                     .reshape(num_local_experts, hidden_size, 90, -1) | ||||
|                     .reshape(32, 2880, 90, -1) | ||||
|                 ) | ||||
|                 state_dict[f"{name}.down_proj_scales"] = ( | ||||
|                     module.down_proj_precision_config.weight_scale.storage.layout.unswizzle_data( | ||||
|  | ||||
| @ -1378,7 +1378,7 @@ class MistralCommonTokenizer(PushToHubMixin): | ||||
|         truncation: bool = False, | ||||
|         max_length: Optional[int] = None, | ||||
|         return_tensors: Optional[Union[str, TensorType]] = None, | ||||
|         return_dict: bool = True, | ||||
|         return_dict: bool = False, | ||||
|         **kwargs, | ||||
|     ) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]: | ||||
|         """ | ||||
|  | ||||
| @ -18,8 +18,6 @@ fronting encoding methods) Special token mixing (host the special tokens logic) | ||||
| of output with special method for the Fast tokenizers) | ||||
| """ | ||||
|  | ||||
| from __future__ import annotations | ||||
|  | ||||
| import copy | ||||
| import json | ||||
| import os | ||||
| @ -785,7 +783,7 @@ class BatchEncoding(UserDict): | ||||
|  | ||||
|         return self | ||||
|  | ||||
|     def to(self, device: Union[str, torch.device], *, non_blocking: bool = False) -> BatchEncoding: | ||||
|     def to(self, device: Union[str, "torch.device"], *, non_blocking: bool = False) -> "BatchEncoding": | ||||
|         """ | ||||
|         Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only). | ||||
|  | ||||
| @ -1588,7 +1586,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|         truncation: bool = False, | ||||
|         max_length: Optional[int] = None, | ||||
|         return_tensors: Optional[Union[str, TensorType]] = None, | ||||
|         return_dict: bool = True, | ||||
|         return_dict: bool = False, | ||||
|         return_assistant_tokens_mask: bool = False, | ||||
|         tokenizer_kwargs: Optional[dict[str, Any]] = None, | ||||
|         **kwargs, | ||||
| @ -1661,11 +1659,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|             set, will return a dict of tokenizer outputs instead. | ||||
|         """ | ||||
|  | ||||
|         if not tokenize: | ||||
|             return_dict = False  # dicts are only returned by the tokenizer anyway | ||||
|         if return_dict and not tokenize: | ||||
|             raise ValueError( | ||||
|                 "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict " | ||||
|                 "of tokenizer outputs to return." | ||||
|             ) | ||||
|  | ||||
|         if return_assistant_tokens_mask and not (return_dict and tokenize): | ||||
|             raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`") | ||||
|         if return_assistant_tokens_mask and not return_dict: | ||||
|             raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`") | ||||
|  | ||||
|         if tokenizer_kwargs is None: | ||||
|             tokenizer_kwargs = {} | ||||
| @ -1780,17 +1781,13 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|             ) | ||||
|  | ||||
|         if conversation_history is None or len(conversation_history) == 0: | ||||
|             return self.apply_chat_template( | ||||
|                 [message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs | ||||
|             ) | ||||
|             return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs) | ||||
|  | ||||
|         conversation = conversation_history + [message] | ||||
|         tokens = self.apply_chat_template( | ||||
|             conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs | ||||
|         ) | ||||
|         tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs) | ||||
|  | ||||
|         prefix_tokens = self.apply_chat_template( | ||||
|             conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs | ||||
|             conversation_history, add_generation_prompt=False, tokenize=True, **kwargs | ||||
|         ) | ||||
|         # It's possible that the prefix tokens are not a prefix of the full list of tokens. | ||||
|         # For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`. | ||||
| @ -1861,11 +1858,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|  | ||||
|         return chat_template | ||||
|  | ||||
|     def parse_response( | ||||
|         self, | ||||
|         response: str | list[str | int | list[int]] | np.ndarray | torch.Tensor, | ||||
|         schema: list | dict | None = None, | ||||
|     ): | ||||
|     def parse_response(self, response: str, schema: Optional[Union[list, dict]] = None): | ||||
|         """ | ||||
|         Converts an output string created by generating text from a model into a parsed message dictionary. | ||||
|         This method is intended for use with chat models, and will read the tokenizer's `response_schema` attribute to | ||||
| @ -1876,29 +1869,16 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|  | ||||
|         Args: | ||||
|             response (`str`): | ||||
|                 The output string generated by the model. This can be either a decoded string or list of strings, | ||||
|                 or token IDs as a list/array. | ||||
|                 The output string generated by the model. This should be the decoded string, not raw tokens. | ||||
|             schema (`Union[list, dict]`, *optional*): | ||||
|                 A response schema that indicates the expected output format and how parsing should be performed. | ||||
|                 If not provided, the tokenizer's `response_schema` attribute will be used. | ||||
|         """ | ||||
|         batched = ( | ||||
|             (isinstance(response, list) and not isinstance(response[0], int)) | ||||
|             or getattr(response, "ndim", 0) > 1  # For torch/numpy tensors | ||||
|         ) | ||||
|  | ||||
|         if schema is None: | ||||
|             if getattr(self, "response_schema", None) is None: | ||||
|                 raise AttributeError("This tokenizer does not have a `response_schema` for parsing chat responses!") | ||||
|             schema = self.response_schema | ||||
|         if batched: | ||||
|             if not (isinstance(response, list) and isinstance(response[0], str)): | ||||
|                 response = self.batch_decode(response) | ||||
|             return [recursive_parse(single_response, schema) for single_response in response] | ||||
|         else: | ||||
|             if not isinstance(response, str): | ||||
|                 response = self.decode(response) | ||||
|             return recursive_parse(response, schema) | ||||
|         return recursive_parse(response, schema) | ||||
|  | ||||
|     @classmethod | ||||
|     def from_pretrained( | ||||
| @ -3883,7 +3863,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|  | ||||
|     def batch_decode( | ||||
|         self, | ||||
|         sequences: Union[list[int], list[list[int]], np.ndarray, torch.Tensor], | ||||
|         sequences: Union[list[int], list[list[int]], np.ndarray, "torch.Tensor"], | ||||
|         skip_special_tokens: bool = False, | ||||
|         clean_up_tokenization_spaces: Optional[bool] = None, | ||||
|         **kwargs, | ||||
| @ -3917,7 +3897,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): | ||||
|  | ||||
|     def decode( | ||||
|         self, | ||||
|         token_ids: Union[int, list[int], np.ndarray, torch.Tensor], | ||||
|         token_ids: Union[int, list[int], np.ndarray, "torch.Tensor"], | ||||
|         skip_special_tokens: bool = False, | ||||
|         clean_up_tokenization_spaces: Optional[bool] = None, | ||||
|         **kwargs, | ||||
|  | ||||
| @ -173,12 +173,12 @@ def recursive_parse( | ||||
|             return parsed_schema | ||||
|         elif isinstance(node_content, dict): | ||||
|             for key, child_node in node_schema.get("properties", {}).items(): | ||||
|                 if "const" in child_node: | ||||
|                     parsed_schema[key] = child_node["const"] | ||||
|                 elif key in node_content: | ||||
|                 if key in node_content: | ||||
|                     parsed_schema[key] = recursive_parse(node_content[key], child_node) | ||||
|                 elif "default" in child_node: | ||||
|                     parsed_schema[key] = child_node["default"] | ||||
|                 else: | ||||
|                     pass | ||||
|             if "additionalProperties" in node_schema: | ||||
|                 for key, value in node_content.items(): | ||||
|                     if key not in node_schema.get("properties", {}): | ||||
|  | ||||
| @ -47,7 +47,8 @@ PACKAGE_DISTRIBUTION_MAPPING = importlib.metadata.packages_distributions() | ||||
| def _is_package_available(pkg_name: str, return_version: bool = False) -> tuple[bool, str] | bool: | ||||
|     """Check if `pkg_name` exist, and optionally try to get its version""" | ||||
|     spec = importlib.util.find_spec(pkg_name) | ||||
|     package_exists = spec is not None | ||||
|     # the spec might be not None but not importable | ||||
|     package_exists = spec is not None and spec.loader is not None | ||||
|     package_version = "N/A" | ||||
|     if package_exists and return_version: | ||||
|         try: | ||||
|  | ||||
| @ -520,6 +520,7 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase): | ||||
|             quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]), | ||||
|         ) | ||||
|         processor = AutoProcessor.from_pretrained(model_id) | ||||
|         assert model.device.type == "cuda", "This test is only supported on CUDA"  # TODO: remove this | ||||
|         # Prepare inputs with no images | ||||
|         inputs = processor(text="Hello, I am", return_tensors="pt").to(torch_device) | ||||
|  | ||||
|  | ||||
| @ -267,7 +267,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         EXPECTED_LOGITS = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): [1.6699, 0.6260, 3.2266, 8.5547, 2.209], | ||||
|                 ("xpu", 3): [0.4109, 0.1532, 0.8018, 2.1328, 0.5483], | ||||
|                 # 4-bit | ||||
|                 ("cuda", 7): [0.1097, 0.3481, 3.8340, 9.7969, 2.0488], | ||||
|                 ("cuda", 8): [1.6396, 0.6094, 3.1992, 8.5234, 2.1875], | ||||
| @ -308,7 +308,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.", | ||||
|                 ("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.", | ||||
|                 # 4-bit | ||||
|                 ("cuda", 7): "Sure, here's a haiku for you:\n\nMorning dew sparkles,\nPetals unfold in sunlight,\n", | ||||
|                 ("cuda", 8): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.", | ||||
| @ -474,7 +474,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): | ||||
|         # Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232 | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.", | ||||
|                 ("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest lake.", | ||||
|                 ("cuda", 7): 'Wooden bridge stretches\nMirrored lake below, mountains rise\nPeaceful, serene', | ||||
|                 ("cuda", 8): 'Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.', | ||||
|             } | ||||
|  | ||||
| @ -16,7 +16,6 @@ | ||||
| import copy | ||||
| import tempfile | ||||
| import unittest | ||||
| import unittest.mock | ||||
| from functools import cached_property | ||||
|  | ||||
| import timeout_decorator  # noqa | ||||
| @ -478,23 +477,6 @@ class BartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin | ||||
|             with torch.no_grad(): | ||||
|                 model(**inputs)[0] | ||||
|  | ||||
|     def test_input_embeddings_support_forward_hook(self): | ||||
|         # Make sure that registering hooks on the input embeddings are indeed called | ||||
|         # in forward. This is necessary for gradient checkpointing in PEFT, see also #41821. | ||||
|         config, inputs_dict = self.model_tester.prepare_config_and_inputs() | ||||
|         for model_class in self.all_model_classes: | ||||
|             model = model_class(config) | ||||
|             model.to(torch_device) | ||||
|             model.eval() | ||||
|  | ||||
|             hook = unittest.mock.MagicMock(return_value=None) | ||||
|             model.get_input_embeddings().register_forward_hook(hook) | ||||
|  | ||||
|             inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) | ||||
|             model(**inputs) | ||||
|  | ||||
|             self.assertGreater(hook.call_count, 0) | ||||
|  | ||||
|     @require_torch_fp16 | ||||
|     def test_generate_fp16(self): | ||||
|         config, input_dict = self.model_tester.prepare_config_and_inputs() | ||||
|  | ||||
| @ -18,6 +18,7 @@ import unittest | ||||
| from functools import cached_property | ||||
|  | ||||
| from transformers import BlenderbotTokenizer, BlenderbotTokenizerFast | ||||
| from transformers.testing_utils import require_jinja | ||||
|  | ||||
|  | ||||
| class Blenderbot3BTokenizerTests(unittest.TestCase): | ||||
| @ -50,3 +51,24 @@ class Blenderbot3BTokenizerTests(unittest.TestCase): | ||||
|     def test_3B_tokenization_same_as_parlai_rust_tokenizer(self): | ||||
|         assert self.rust_tokenizer_3b.add_prefix_space | ||||
|         assert self.rust_tokenizer_3b([" Sam", "Sam"]).input_ids == [[5502, 2], [5502, 2]] | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tok = self.tokenizer_3b | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         tokenized_chats = [tok.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         expected_tokens = [ | ||||
|             [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 2], | ||||
|             [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 228, 3490, 287, 2273, 304, 21, 2], | ||||
|             [3490, 287, 2273, 304, 21, 228, 228, 6950, 8, 2], | ||||
|         ] | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
| @ -18,7 +18,7 @@ import unittest | ||||
| from datasets import load_dataset | ||||
|  | ||||
| from transformers import BloomTokenizerFast | ||||
| from transformers.testing_utils import require_tokenizers | ||||
| from transformers.testing_utils import require_jinja, require_tokenizers | ||||
|  | ||||
| from ...test_tokenization_common import TokenizerTesterMixin | ||||
|  | ||||
| @ -137,6 +137,28 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase): | ||||
|         predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens] | ||||
|         self.assertListEqual(predicted_text, input_text) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = self.get_rust_tokenizer() | ||||
|         tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}" | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         expected_tokens = [ | ||||
|             [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2], | ||||
|             [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2, 229126, 427, 11890, 1152, 17, 2], | ||||
|             [229126, 427, 11890, 1152, 17, 2, 59414, 4, 2], | ||||
|         ] | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
|     def test_add_prefix_space_fast(self): | ||||
|         tokenizer_w_prefix = self.get_rust_tokenizer(add_prefix_space=True) | ||||
|         tokenizer_wo_prefix = self.get_rust_tokenizer(add_prefix_space=False) | ||||
|  | ||||
| @ -146,6 +146,32 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase): | ||||
|         self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1) | ||||
|         self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = self.get_rust_tokenizer() | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|         ] | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         # fmt: off | ||||
|         expected_tokens = [ | ||||
|             [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8], | ||||
|             [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, | ||||
|             59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, | ||||
|             36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, | ||||
|             45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, | ||||
|             58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38, | ||||
|             54, 567, 235, 693, 276, 411, 243, 22, 8] | ||||
|         ] | ||||
|         # fmt: on | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_tool_use(self): | ||||
|         tokenizer = self.get_rust_tokenizer() | ||||
|  | ||||
| @ -27,6 +27,7 @@ from transformers.convert_slow_tokenizer import convert_slow_tokenizer | ||||
| from transformers.testing_utils import ( | ||||
|     get_tests_dir, | ||||
|     nested_simplify, | ||||
|     require_jinja, | ||||
|     require_read_token, | ||||
|     require_sentencepiece, | ||||
|     require_tokenizers, | ||||
| @ -427,6 +428,25 @@ class GemmaIntegrationTest(unittest.TestCase): | ||||
|         # a dummy prefix space is not added by the sp_model as it was de-activated | ||||
|         self.assertEqual(tokens, tokenizer.sp_model.encode("▁▁", out_type=str)) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") | ||||
|  | ||||
|         test_chats = [ | ||||
|             [{"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         # Matt: The third test case tests the default system message, but if this is ever changed in the | ||||
|         #       class/repo code then that test will fail, and the case will need to be updated. | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         expected_tokens = [[235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108, 235322, 235371, 571, 235298, 2997, 73786, 105776, 108, 7731, 577, 4664, 692, 35606, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108]]  # fmt: skip | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
|     def test_save_fast_load_slow(self): | ||||
|         # Ensure that we can save a fast tokenizer and load it as a slow tokenizer | ||||
|         slow_tokenizer = self.tokenizer | ||||
|  | ||||
| @ -499,7 +499,7 @@ class Gemma3IntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         EXPECTED_TEXTS = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'], | ||||
|                 ("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach with turquoise water in the background. It looks like a lovely,'], | ||||
|                 ("cuda", (8, 0)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like'], | ||||
|                 ("cuda", (8, 6)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear blue water and a blue sky in the background. It looks like'], | ||||
|                 ("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'], | ||||
| @ -610,7 +610,7 @@ class Gemma3IntegrationTest(unittest.TestCase): | ||||
|         EXPECTED_NUM_IMAGES = 3  # one for the origin image and two crops of images | ||||
|         EXPECTED_TEXTS = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a bright blue sky with some white clouds in the"], | ||||
|                 ("xpu", 3): ['user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There are clouds in the blue sky above.'], | ||||
|                 ("cuda", 7): [], | ||||
|                 ("cuda", (8, 6)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a clear blue sky with some white clouds above."], | ||||
|                 ("cuda", (8, 0)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a blue sky with some white clouds in the background"], | ||||
|  | ||||
| @ -24,9 +24,7 @@ from transformers import ( | ||||
|     is_torch_available, | ||||
| ) | ||||
| from transformers.testing_utils import ( | ||||
|     Expectations, | ||||
|     cleanup, | ||||
|     require_deterministic_for_xpu, | ||||
|     require_flash_attn, | ||||
|     require_torch, | ||||
|     require_torch_gpu, | ||||
| @ -415,7 +413,6 @@ class Glm4vIntegrationTest(unittest.TestCase): | ||||
|         ) | ||||
|  | ||||
|     @slow | ||||
|     @require_deterministic_for_xpu | ||||
|     def test_small_model_integration_test_expand(self): | ||||
|         model = Glm4vForConditionalGeneration.from_pretrained( | ||||
|             "THUDM/GLM-4.1V-9B-Thinking", dtype="auto", device_map="auto" | ||||
| @ -429,23 +426,14 @@ class Glm4vIntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         output = model.generate(**inputs, max_new_tokens=30, do_sample=False, num_beams=2, num_return_sequences=2) | ||||
|  | ||||
|         # fmt: off | ||||
|         EXPECTED_DECODED_TEXTS = Expectations( | ||||
|             { | ||||
|  | ||||
|                 (None, None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically", | ||||
|                                "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically" | ||||
|                               ], | ||||
|                 ("xpu", None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat. Specifically, it looks", | ||||
|                                 "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat, specifically a Pallas" | ||||
|                                ], | ||||
|             } | ||||
|         EXPECTED_DECODED_TEXT = [ | ||||
|             "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically", | ||||
|             "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically" | ||||
|         ]  # fmt: skip | ||||
|         self.assertEqual( | ||||
|             self.processor.batch_decode(output, skip_special_tokens=True), | ||||
|             EXPECTED_DECODED_TEXT, | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation() | ||||
|  | ||||
|         decoded_text = self.processor.batch_decode(output, skip_special_tokens=True) | ||||
|         self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT) | ||||
|  | ||||
|     @slow | ||||
|     def test_small_model_integration_test_batch_wo_image(self): | ||||
|  | ||||
| @ -19,7 +19,7 @@ import unittest | ||||
|  | ||||
| from transformers import AutoTokenizer, GPT2Tokenizer, GPT2TokenizerFast | ||||
| from transformers.models.gpt2.tokenization_gpt2 import VOCAB_FILES_NAMES | ||||
| from transformers.testing_utils import require_tiktoken, require_tokenizers | ||||
| from transformers.testing_utils import require_jinja, require_tiktoken, require_tokenizers | ||||
|  | ||||
| from ...test_tokenization_common import TokenizerTesterMixin | ||||
|  | ||||
| @ -281,6 +281,28 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): | ||||
|                 filtered_sequence = [x for x in filtered_sequence if x is not None] | ||||
|                 self.assertEqual(encoded_sequence, filtered_sequence) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname) | ||||
|         tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}" | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         # fmt: off | ||||
|         expected_tokens = [[20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20], | ||||
|                           [20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20, 20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20], | ||||
|                           [20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20, 20, 3, 0, 0, 1, 20, 20]] | ||||
|         # fmt: on | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
|     @require_tiktoken | ||||
|     def test_tokenization_tiktoken(self): | ||||
|         from tiktoken import encoding_name_for_model | ||||
|  | ||||
| @ -15,7 +15,7 @@ | ||||
| import unittest | ||||
|  | ||||
| from transformers import GPTSw3Tokenizer | ||||
| from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow | ||||
| from transformers.testing_utils import get_tests_dir, require_jinja, require_sentencepiece, require_tokenizers, slow | ||||
|  | ||||
| from ...test_tokenization_common import TokenizerTesterMixin | ||||
|  | ||||
| @ -127,3 +127,36 @@ class GPTSw3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): | ||||
|             model_name="AI-Sweden-Models/gpt-sw3-126m", | ||||
|             sequences=sequences, | ||||
|         ) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB) | ||||
|         tokenizer.chat_template = ( | ||||
|             "{{ eos_token }}{{ bos_token }}" | ||||
|             "{% for message in messages %}" | ||||
|             "{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}" | ||||
|             "{% else %}{{ 'Bot: ' + message['content']}}{% endif %}" | ||||
|             "{{ message['text'] }}{{ bos_token }}" | ||||
|             "{% endfor %}" | ||||
|             "Bot:" | ||||
|         ) | ||||
|         # This is in English, but it's just here to make sure the chat control tokens are being added properly | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         # fmt: off | ||||
|         expected_tokens = [ | ||||
|             [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419], | ||||
|             [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 575, 541, 419], | ||||
|             [2000, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419] | ||||
|             ] | ||||
|         # fmt: on | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
| @ -682,7 +682,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase): | ||||
|  | ||||
|         expectations = Expectations( | ||||
|             { | ||||
|                 (None, None): [0.4526, 0.4082], | ||||
|                 (None, None): [[0.4526, 0.4082]], | ||||
|                 ("cuda", 8): [0.4524, 0.4074], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
| @ -227,7 +227,6 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): | ||||
|     def tearDown(self): | ||||
|         cleanup(torch_device, gc_collect=True) | ||||
|  | ||||
|     @require_deterministic_for_xpu | ||||
|     def test_qwen2_small_model_integration_generate(self): | ||||
|         processor = AutoProcessor.from_pretrained(self.small_model_checkpoint) | ||||
|         model = InternVLForConditionalGeneration.from_pretrained( | ||||
| @ -245,16 +244,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): | ||||
|             decoded_output = processor.decode( | ||||
|                 generate_ids[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True | ||||
|             ) | ||||
|  | ||||
|         # fmt: off | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 (None, None): "The image shows two cats lying on a pink surface, which appears to be a bed or couch.", | ||||
|                 ("xpu", 3): "The image shows two cats lying on a pink blanket. The cat on the left is a tabby", | ||||
|             } | ||||
|         ) | ||||
|         # fmt: on | ||||
|         expected_output = expected_outputs.get_expectation() | ||||
|         expected_output = "The image shows two cats lying on a pink surface, which appears to be a bed or couch." | ||||
|  | ||||
|         self.assertEqual(decoded_output, expected_output) | ||||
|  | ||||
| @ -278,9 +268,9 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): | ||||
|         actual_logits = output.logits[0, -1, :5].cpu() | ||||
|         expected_logits_all = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): torch.tensor([11.9922, 14.7188, 14.3125, 10.6719, 6.9297], dtype=torch.float16), | ||||
|                 ("cuda", 7): torch.tensor([11.9531, 14.7031, 14.2734, 10.6562, 6.9219], dtype=torch.float16), | ||||
|                 ("cuda", 8): torch.tensor([11.9609, 14.7188, 14.2734, 10.6484, 6.9141], dtype=torch.float16), | ||||
|                 ("xpu", 3): torch.tensor([11.7500, 14.7500, 14.1250, 10.5625, 6.7812], dtype=torch.float16), | ||||
|                 ("cuda", 7): torch.tensor([11.9531, 14.7031, 14.2734, 10.6562,  6.9219], dtype=torch.float16), | ||||
|                 ("cuda", 8): torch.tensor([11.9609, 14.7188, 14.2734, 10.6484,  6.9141], dtype=torch.float16), | ||||
|             } | ||||
|         )  # fmt: skip | ||||
|         expected_logits = expected_logits_all.get_expectation() | ||||
| @ -308,7 +298,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): "Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.", | ||||
|                 ("xpu", 3): "Whispers of dawn,\nSilent whispers of the night,\nNew day's light.", | ||||
|                 ("cuda", 7): 'Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.', | ||||
|                 ("cuda", 8): 'Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.', | ||||
|             } | ||||
| @ -580,7 +570,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): | ||||
|         decoded_output = processor.decode(output[1], skip_special_tokens=True) | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot", | ||||
|                 ("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot.", | ||||
|                 ("cuda", 7): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot', | ||||
|             } | ||||
|         )  # fmt: skip | ||||
|  | ||||
| @ -18,9 +18,7 @@ import unittest | ||||
|  | ||||
| from transformers import AutoTokenizer, is_torch_available, set_seed | ||||
| from transformers.testing_utils import ( | ||||
|     Expectations, | ||||
|     cleanup, | ||||
|     require_deterministic_for_xpu, | ||||
|     require_read_token, | ||||
|     require_torch, | ||||
|     require_torch_accelerator, | ||||
| @ -172,30 +170,36 @@ class Lfm2MoeIntegrationTest(unittest.TestCase): | ||||
|         input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) | ||||
|         with torch.no_grad(): | ||||
|             out = model(input_ids).logits.float().cpu() | ||||
|         # fmt: off | ||||
|         # Expected mean on dim = -1 | ||||
|         EXPECTED_MEANS = Expectations( | ||||
|             { | ||||
|                 ("cuda", None): torch.tensor([[-1.3855, -0.5123, -1.3143, -1.2144, -1.0791, -1.2117, -1.4704, -0.7648, -0.6175, -1.2402, -1.1459, -1.0083, -1.0247, -0.8830, -1.5643, -1.7266, -1.6254,]]), | ||||
|                 ("xpu", None): torch.tensor([[-1.3863, -0.4653, -1.3246, -1.3199, -1.0940, -1.2254, -1.4716, -0.8852, -0.5920, -1.2182, -1.1782, -1.0268, -1.0114, -0.8816, -1.5774, -1.7408, -1.6147,]]), | ||||
|             } | ||||
|         EXPECTED_MEAN = torch.tensor( | ||||
|             [ | ||||
|                 [ | ||||
|                     -1.3855, | ||||
|                     -0.5123, | ||||
|                     -1.3143, | ||||
|                     -1.2144, | ||||
|                     -1.0791, | ||||
|                     -1.2117, | ||||
|                     -1.4704, | ||||
|                     -0.7648, | ||||
|                     -0.6175, | ||||
|                     -1.2402, | ||||
|                     -1.1459, | ||||
|                     -1.0083, | ||||
|                     -1.0247, | ||||
|                     -0.8830, | ||||
|                     -1.5643, | ||||
|                     -1.7266, | ||||
|                     -1.6254, | ||||
|                 ] | ||||
|             ] | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_MEAN = EXPECTED_MEANS.get_expectation() | ||||
|         out_mean = out.mean(-1) | ||||
|         torch.testing.assert_close(out_mean, EXPECTED_MEAN, rtol=1e-2, atol=1e-2) | ||||
|         # fmt: off | ||||
|         torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2) | ||||
|         # Expected portion of the logits | ||||
|         EXPECTED_SLICES = Expectations( | ||||
|             { | ||||
|                 ("cuda", None): torch.tensor([-1.2656, 2.4844, 5.5000, -1.3359, -1.3203, -1.3438, 1.9375, 5.8438, -0.6523, -1.2891]), | ||||
|                 ("xpu", None): torch.tensor([-1.2656, 2.4531, 5.4375, -1.3438, -1.3203, -1.3516, 1.9297, 5.7812, -0.6719, -1.3203]), | ||||
|             } | ||||
|         EXPECTED_SLICE = torch.tensor( | ||||
|             [-1.2656, 2.4844, 5.5000, -1.3359, -1.3203, -1.3438, 1.9375, 5.8438, -0.6523, -1.2891] | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_SLICE = EXPECTED_SLICES.get_expectation() | ||||
|         out_slice = out[0, 0, :10] | ||||
|         torch.testing.assert_close(out_slice, EXPECTED_SLICE, rtol=1e-4, atol=1e-4) | ||||
|         torch.testing.assert_close(out[0, 0, :10], EXPECTED_SLICE, rtol=1e-4, atol=1e-4) | ||||
|  | ||||
|     @slow | ||||
|     def test_model_1a8b_generation(self): | ||||
| @ -213,25 +217,13 @@ class Lfm2MoeIntegrationTest(unittest.TestCase): | ||||
|         self.assertEqual(EXPECTED_TEXT_COMPLETION, text) | ||||
|  | ||||
|     @slow | ||||
|     @require_deterministic_for_xpu | ||||
|     def test_model_1a8b_batched_chat_generation(self): | ||||
|         prompts = ["Who are you?", "Complete the text: Lorem ipsum dolor ", "The Meji Restoration in Japan ended"] | ||||
|         # fmt: off | ||||
|         EXPECTED_TEXT_COMPLETIONS = Expectations( | ||||
|             { | ||||
|                 ("cuda", None): ["Who are you?, a language model designed to assist with information and tasks?  \nI am", | ||||
|                                  "Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor", | ||||
|                                  "The Meji Restoration in Japan ended or the Meiji Restoration (1868–1912) marked a pivotal", | ||||
|                                 ], | ||||
|                 ("xpu", None): ['Who are you? (AI) designed to assist?  \nI am an AI assistant developed to', | ||||
|                                 'Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor', | ||||
|                                 'The Meji Restoration in Japan ended**  \n* **Key Event:** The overthrow of the Tokugawa' | ||||
|                                ], | ||||
|             } | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation() | ||||
|  | ||||
|         EXPECTED_TEXT_COMPLETIONS = [ | ||||
|             "Who are you?, a language model designed to assist with information and tasks?  \nI am", | ||||
|             "Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor", | ||||
|             "The Meji Restoration in Japan ended or the Meiji Restoration (1868–1912) marked a pivotal", | ||||
|         ] | ||||
|         set_seed(1789) | ||||
|         tokenizer = AutoTokenizer.from_pretrained("LiquidAI/LFM2-8B-A1B", use_fast=False) | ||||
|         model = self.get_model() | ||||
| @ -241,4 +233,4 @@ class Lfm2MoeIntegrationTest(unittest.TestCase): | ||||
|         with torch.no_grad(): | ||||
|             generated_ids = model.generate(**batched_input_ids, max_new_tokens=15, do_sample=False) | ||||
|         text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) | ||||
|         self.assertEqual(EXPECTED_TEXT_COMPLETION, text) | ||||
|         self.assertEqual(EXPECTED_TEXT_COMPLETIONS, text) | ||||
|  | ||||
| @ -32,6 +32,7 @@ from transformers.convert_slow_tokenizer import convert_slow_tokenizer | ||||
| from transformers.testing_utils import ( | ||||
|     get_tests_dir, | ||||
|     nested_simplify, | ||||
|     require_jinja, | ||||
|     require_read_token, | ||||
|     require_sentencepiece, | ||||
|     require_tiktoken, | ||||
| @ -701,6 +702,32 @@ class LlamaIntegrationTest(unittest.TestCase): | ||||
|         with self.assertRaises(ValueError): | ||||
|             tokenizer = LlamaTokenizerFast(SAMPLE_VOCAB, eos_token=None, add_bos_token=True, add_eos_token=True) | ||||
|  | ||||
|     @require_jinja | ||||
|     def test_tokenization_for_chat(self): | ||||
|         tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b", legacy=False) | ||||
|  | ||||
|         test_chats = [ | ||||
|             [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], | ||||
|             [ | ||||
|                 {"role": "system", "content": "You are a helpful chatbot."}, | ||||
|                 {"role": "user", "content": "Hello!"}, | ||||
|                 {"role": "assistant", "content": "Nice to meet you."}, | ||||
|             ], | ||||
|             [{"role": "user", "content": "Hello!"}], | ||||
|         ] | ||||
|         # Matt: The third test case tests the default system message, but if this is ever changed in the | ||||
|         #       class/repo code then that test will fail, and the case will need to be updated. | ||||
|         tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] | ||||
|         # fmt: off | ||||
|         expected_tokens = [ | ||||
|             [1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962], | ||||
|             [1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962, 20103, 304, 5870, 366, 29889, 29871, 2], | ||||
|             [1, 29961, 25580, 29962, 15043, 29991, 518, 29914, 25580, 29962] | ||||
|         ] | ||||
|         # fmt: on | ||||
|         for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): | ||||
|             self.assertListEqual(tokenized_chat, expected_tokens) | ||||
|  | ||||
|  | ||||
| @require_sentencepiece | ||||
| @require_tokenizers | ||||
|  | ||||
| @ -152,11 +152,10 @@ class LlavaVisionText2TextModelTester: | ||||
|     def prepare_config_and_inputs_for_common(self): | ||||
|         config_and_inputs = self.prepare_config_and_inputs() | ||||
|         config, pixel_values = config_and_inputs | ||||
|  | ||||
|         input_ids = ids_tensor([self.batch_size, self.seq_length], config.text_config.vocab_size - 2) + 2 | ||||
|         attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(torch_device) | ||||
|         input_ids = ids_tensor([self.batch_size, self.seq_length], config.text_config.vocab_size - 1) + 1 | ||||
|         input_ids[input_ids == config.image_token_index] = self.pad_token_id | ||||
|         input_ids[:, : self.num_image_tokens] = config.image_token_index | ||||
|         attention_mask = input_ids.ne(1).to(torch_device) | ||||
|  | ||||
|         inputs_dict = { | ||||
|             "pixel_values": pixel_values, | ||||
|  | ||||
| @ -275,7 +275,6 @@ class Mistral3IntegrationTest(unittest.TestCase): | ||||
|         self.assertEqual(decoded_output, expected_output) | ||||
|  | ||||
|     @require_read_token | ||||
|     @require_deterministic_for_xpu | ||||
|     def test_mistral3_integration_generate(self): | ||||
|         processor = AutoProcessor.from_pretrained(self.model_checkpoint) | ||||
|         processor.chat_template = processor.chat_template.replace('strftime_now("%Y-%m-%d")', '"2025-06-20"') | ||||
| @ -300,7 +299,7 @@ class Mistral3IntegrationTest(unittest.TestCase): | ||||
|  | ||||
|         expected_outputs = Expectations( | ||||
|             { | ||||
|                 ("xpu", 3): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or", | ||||
|                 ("xpu", 3): "The image features two cats resting on a pink blanket. The cat on the left is a kitten", | ||||
|                 ("cuda", 8): 'The image features two cats lying on a pink surface, which appears to be a couch or a bed', | ||||
|                 ("rocm", (9, 4)): "The image features two cats lying on a pink surface, which appears to be a couch or a bed", | ||||
|                 ("rocm", (9, 5)): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or" | ||||
|  | ||||
| @ -146,7 +146,7 @@ class MLCDVisionModelIntegrationTest(unittest.TestCase): | ||||
|     @slow | ||||
|     def test_inference(self): | ||||
|         model_name = "DeepGlint-AI/mlcd-vit-bigG-patch14-448" | ||||
|         model = MLCDVisionModel.from_pretrained(model_name, attn_implementation="eager").to(torch_device) | ||||
|         model = MLCDVisionModel.from_pretrained(model_name).to(torch_device) | ||||
|         processor = AutoProcessor.from_pretrained(model_name) | ||||
|  | ||||
|         # process single image | ||||
|  | ||||
| @ -547,7 +547,7 @@ class MllamaForConditionalGenerationIntegrationTest(unittest.TestCase): | ||||
|         decoded_output = processor.decode(output[0], skip_special_tokens=True) | ||||
|         expected_outputs = Expectations( | ||||
|                 { | ||||
|                     ("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever", | ||||
|                     ("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy tapestry\n Threads of joy and sorrow\nWeft of memories", | ||||
|                     ("cuda", 7): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever", | ||||
|                     ("cuda", 8): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever", | ||||
|                 } | ||||
|  | ||||
| @ -193,15 +193,7 @@ class RecurrentGemmaIntegrationTest(unittest.TestCase): | ||||
|     @require_bitsandbytes | ||||
|     @require_read_token | ||||
|     def test_model_2b_8bit(self): | ||||
|         # fmt: off | ||||
|         EXPECTED_TEXTS = Expectations( | ||||
|             { | ||||
|                 ("xpu", None): ['Hello I am doing a project on the topic of "The impact of the internet on the society" and I am stuck', "Hi today I'm going to show you how to make a simple and easy to make a 3D"], | ||||
|                 (None, None): ['Hello I am doing a project on the topic of "The impact of social media on the society" and I am looking', "Hi today I'm going to show you how to make a simple and easy to make a 3D"], | ||||
|             } | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation() | ||||
|         EXPECTED_TEXTS = ['Hello I am doing a project on the topic of "The impact of social media on the society" and I am looking', "Hi today I'm going to show you how to make a simple and easy to make a 3D"]  # fmt: skip | ||||
|  | ||||
|         model = AutoModelForCausalLM.from_pretrained( | ||||
|             "gg-hf/recurrent-gemma-2b-hf", | ||||
| @ -216,7 +208,7 @@ class RecurrentGemmaIntegrationTest(unittest.TestCase): | ||||
|         output = model.generate(**inputs, max_new_tokens=20, do_sample=False) | ||||
|         output_text = tokenizer.batch_decode(output, skip_special_tokens=True) | ||||
|  | ||||
|         self.assertEqual(output_text, EXPECTED_TEXT) | ||||
|         self.assertEqual(output_text, EXPECTED_TEXTS) | ||||
|  | ||||
|     @require_read_token | ||||
|     def test_long_context(self): | ||||
|  | ||||
| @ -23,7 +23,6 @@ from transformers import ( | ||||
|     is_torch_available, | ||||
| ) | ||||
| from transformers.testing_utils import ( | ||||
|     Expectations, | ||||
|     cleanup, | ||||
|     require_torch, | ||||
|     slow, | ||||
| @ -298,15 +297,9 @@ class VoxtralForConditionalGenerationIntegrationTest(unittest.TestCase): | ||||
|         outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500) | ||||
|         decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) | ||||
|  | ||||
|         # fmt: off | ||||
|         EXPECTED_OUTPUTS = Expectations( | ||||
|             { | ||||
|                 (None, None): ["What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He acknowledges the diverse perspectives and conversations he had with the public, which kept him honest and inspired. The president also emphasizes the importance of self-government and civic engagement, encouraging Americans to participate in their democracy actively. He expresses optimism about the country's future and looks forward to continuing his work as a citizen. The audio concludes with a heartfelt thank you and a blessing for the United States."], | ||||
|                 ("xpu", None): ["What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He emphasizes the importance of self-government and active citizenship, encouraging listeners to engage in their communities and participate in democracy. The president expresses his optimism about the country's future and his commitment to continuing to serve as a citizen. He concludes the speech with a heartfelt thank you and a blessing for the United States."], | ||||
|             } | ||||
|         ) | ||||
|         # fmt: on | ||||
|         EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() | ||||
|         EXPECTED_OUTPUT = [ | ||||
|             "What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He acknowledges the diverse perspectives and conversations he had with the public, which kept him honest and inspired. The president also emphasizes the importance of self-government and civic engagement, encouraging Americans to participate in their democracy actively. He expresses optimism about the country's future and looks forward to continuing his work as a citizen. The audio concludes with a heartfelt thank you and a blessing for the United States." | ||||
|         ] | ||||
|         self.assertEqual(decoded_outputs, EXPECTED_OUTPUT) | ||||
|  | ||||
|     @slow | ||||
|  | ||||
| @ -35,7 +35,6 @@ from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline | ||||
| from transformers.pipelines.audio_utils import chunk_bytes_iter, ffmpeg_microphone_live | ||||
| from transformers.pipelines.automatic_speech_recognition import chunk_iter | ||||
| from transformers.testing_utils import ( | ||||
|     Expectations, | ||||
|     compare_pipeline_output_to_hub_spec, | ||||
|     is_pipeline_test, | ||||
|     is_torch_available, | ||||
| @ -1444,14 +1443,8 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): | ||||
|     @slow | ||||
|     def test_whisper_longform(self): | ||||
|         # fmt: off | ||||
|         EXPECTED_RESULTS = Expectations( | ||||
|             { | ||||
|                 (None, None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!", | ||||
|                 ("xpu", None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting of classics, Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a Fisher shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I... APPLAUSE Sometimes I... Startle away, upside down on the monkey bars of a condemned playground on a superfund site. Get all heaped up on goofballs, rummaged that would discard a tag bag of defective toys, yank out a fist bowl of disembodied doll limbs, toss them on a stain kid's place mat from a defunct denys, set up a table inside a rusty cargo container down by the Wharf and challenge toothless drifters to the godless bug house blitz of tournament that is my segment.", | ||||
|             } | ||||
|         ) | ||||
|         EXPECTED_RESULT = " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!" | ||||
|         # fmt: on | ||||
|         EXPECTED_RESULT = EXPECTED_RESULTS.get_expectation() | ||||
|  | ||||
|         processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") | ||||
|         model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") | ||||
|  | ||||
| @ -12,68 +12,26 @@ | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
|  | ||||
| # Run all tests: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py | ||||
| # Run specific config: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py -k "2Proc" | ||||
| # Run multiple configs: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py -k "2Proc or 4Proc" | ||||
| # Run spefic test: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py::TestTensorParallel2Proc::test_model_forward | ||||
| # Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/tensor_parallel/test_tensor_parallel.py | ||||
|  | ||||
| import os | ||||
| import tempfile | ||||
| import warnings | ||||
| import textwrap | ||||
|  | ||||
| from safetensors import safe_open | ||||
|  | ||||
| from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available | ||||
| from transformers import is_torch_available | ||||
| from transformers.integrations.tensor_parallel import get_packed_weights, repack_weights | ||||
| from transformers.testing_utils import ( | ||||
|     TestCasePlus, | ||||
|     backend_device_count, | ||||
|     get_torch_dist_unique_port, | ||||
|     require_huggingface_hub_greater_or_equal, | ||||
|     require_torch_multi_accelerator, | ||||
|     torch_device, | ||||
|     torchrun, | ||||
| ) | ||||
|  | ||||
|  | ||||
| if is_torch_available(): | ||||
|     import torch | ||||
|     import torch.multiprocessing as mp | ||||
|  | ||||
|  | ||||
| def global_wrapper(rank, func, tp, port, func_args, func_kwargs): | ||||
|     def setup_dist_env(rank, world_size, port): | ||||
|         os.environ["WORLD_SIZE"] = str(world_size) | ||||
|         os.environ["RANK"] = str(rank) | ||||
|         os.environ["LOCAL_RANK"] = str(rank) | ||||
|         os.environ["MASTER_ADDR"] = "localhost" | ||||
|         os.environ["MASTER_PORT"] = str(port) | ||||
|  | ||||
|     world_size = tp | ||||
|     setup_dist_env(rank, world_size, port) | ||||
|  | ||||
|     if torch.cuda.is_available(): | ||||
|         torch.cuda.set_device(rank) | ||||
|         torch.distributed.init_process_group(backend="nccl", rank=rank, world_size=world_size) | ||||
|     else: | ||||
|         torch.distributed.init_process_group(backend="gloo", rank=rank, world_size=world_size) | ||||
|  | ||||
|     func(rank, *func_args, **func_kwargs) | ||||
|  | ||||
|     torch.distributed.barrier() | ||||
|     torch.distributed.destroy_process_group() | ||||
|  | ||||
|  | ||||
| def init_distributed(tp: int): | ||||
|     def _init_distributed(func): | ||||
|         def wrapper(*args, **kwargs): | ||||
|             world_size = tp | ||||
|             port = get_torch_dist_unique_port() | ||||
|             spawn_args = (func, tp, port, args, kwargs) | ||||
|             mp.spawn(global_wrapper, args=spawn_args, nprocs=world_size) | ||||
|  | ||||
|         return wrapper | ||||
|  | ||||
|     return _init_distributed | ||||
|  | ||||
|  | ||||
| class TestTensorParallelUtils(TestCasePlus): | ||||
| @ -105,9 +63,191 @@ class TestTensorParallelUtils(TestCasePlus): | ||||
|         assert torch.allclose(unpacked_weights, original_packed_weights) | ||||
|  | ||||
|  | ||||
| class TestTensorParallel(TestCasePlus): | ||||
|     nproc_per_node = 2 | ||||
|  | ||||
|     def test_model_forward(self): | ||||
|         script_to_run = textwrap.dedent( | ||||
|             """ | ||||
|             import torch | ||||
|             import os | ||||
|             from transformers import AutoModelForCausalLM, AutoTokenizer | ||||
|  | ||||
|             model_id = "JackFram/llama-68m" | ||||
|  | ||||
|             model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto") | ||||
|             torch.distributed.barrier() | ||||
|  | ||||
|             has_dtensor = 0 | ||||
|             for name, parameter in model.named_parameters(): | ||||
|                 if isinstance(parameter.data, torch.distributed.tensor.DTensor): | ||||
|                     has_dtensor = 1 | ||||
|                     break | ||||
|  | ||||
|             assert has_dtensor == 1, "TP model must has DTensor" | ||||
|  | ||||
|             tokenizer = AutoTokenizer.from_pretrained(model_id, legacy=False) | ||||
|             prompt = "Can I help" | ||||
|  | ||||
|             inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) | ||||
|             outputs = model(inputs) | ||||
|  | ||||
|             next_token_logits = outputs[0][:, -1, :] | ||||
|             next_token = torch.argmax(next_token_logits, dim=-1) | ||||
|             response = tokenizer.decode(next_token) | ||||
|             assert response == "with" | ||||
|  | ||||
|             torch.distributed.barrier() | ||||
|             torch.distributed.destroy_process_group() | ||||
|             """ | ||||
|         ) | ||||
|         torchrun(script_to_run, self.nproc_per_node, env=self.get_env()) | ||||
|  | ||||
|     def test_model_backward_pass(self): | ||||
|         script_to_run = textwrap.dedent( | ||||
|             """ | ||||
|             import torch | ||||
|             import os | ||||
|             from transformers import AutoModelForCausalLM | ||||
|             from torch import nn | ||||
|  | ||||
|             model_id = "JackFram/llama-68m" | ||||
|  | ||||
|             model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float32, tp_plan="auto") | ||||
|             torch.distributed.barrier() | ||||
|  | ||||
|             # Dummy forward and backward pass | ||||
|             # Note that loss.backward() will fail if there is a bug in the TP implementation | ||||
|             inputs = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device) | ||||
|             labels = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device) | ||||
|             loss = model(inputs, labels=labels).loss | ||||
|             loss.backward() | ||||
|  | ||||
|             torch.distributed.barrier() | ||||
|             torch.distributed.destroy_process_group() | ||||
|             """ | ||||
|         ) | ||||
|         torchrun(script_to_run, self.nproc_per_node, env=self.get_env()) | ||||
|  | ||||
|     def test_model_generate(self): | ||||
|         script_to_run = textwrap.dedent( | ||||
|             """ | ||||
|             import torch | ||||
|             import os | ||||
|             from transformers import AutoModelForCausalLM, AutoTokenizer | ||||
|  | ||||
|             model_id = "JackFram/llama-68m" | ||||
|  | ||||
|             model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto") | ||||
|             torch.distributed.barrier() | ||||
|  | ||||
|             model.forward = torch.compile(model.forward) | ||||
|  | ||||
|             has_dtensor = 0 | ||||
|             for name, parameter in model.named_parameters(): | ||||
|                 if isinstance(parameter.data, torch.distributed.tensor.DTensor): | ||||
|                     has_dtensor = 1 | ||||
|                     break | ||||
|  | ||||
|             assert has_dtensor == 1, "TP model must have DTensor" | ||||
|  | ||||
|             tokenizer = AutoTokenizer.from_pretrained(model_id) | ||||
|             prompt = "Can I help" | ||||
|  | ||||
|             inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) | ||||
|             outputs = model.generate(inputs, max_new_tokens=10, cache_implementation="static") | ||||
|  | ||||
|             output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True) | ||||
|             assert output_text[0].startswith(prompt), f"Expected output to start with '{prompt}', got '{output_text[0]}'" | ||||
|  | ||||
|             torch.distributed.barrier() | ||||
|             torch.distributed.destroy_process_group() | ||||
|             """ | ||||
|         ) | ||||
|         torchrun(script_to_run, self.nproc_per_node, env=self.get_env()) | ||||
|  | ||||
|     @require_huggingface_hub_greater_or_equal("0.31.4") | ||||
|     def test_model_save(self): | ||||
|         from safetensors import safe_open | ||||
|  | ||||
|         with tempfile.TemporaryDirectory() as tmp_dir: | ||||
|             for is_torchrun in [True, False]: | ||||
|                 script_to_run = textwrap.dedent( | ||||
|                     f""" | ||||
|                     import torch | ||||
|                     import os | ||||
|                     from transformers import AutoModelForCausalLM | ||||
|  | ||||
|                     model_id = "JackFram/llama-68m" | ||||
|                     kwargs = dict() | ||||
|  | ||||
|                     if os.environ.get("RANK", None) is not None: | ||||
|                         kwargs["tp_plan"] = "auto" | ||||
|                         result_dir = "{tmp_dir}/tp" | ||||
|                     else: | ||||
|                         result_dir = "{tmp_dir}/nontp" | ||||
|  | ||||
|                     model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs) | ||||
|                     model.save_pretrained(result_dir) | ||||
|                     """ | ||||
|                 ) | ||||
|                 torchrun(script_to_run, self.nproc_per_node, is_torchrun=is_torchrun, env=self.get_env()) | ||||
|  | ||||
|             non_tp_model_path = os.path.join(tmp_dir, "nontp") | ||||
|             tp_model_path = os.path.join(tmp_dir, "tp") | ||||
|  | ||||
|             for filename in os.listdir(non_tp_model_path): | ||||
|                 if not filename.endswith(".safetensors"): | ||||
|                     continue | ||||
|  | ||||
|                 non_tp_model = safe_open(os.path.join(non_tp_model_path, filename), device="cpu", framework="pt") | ||||
|                 tp_model = safe_open(os.path.join(tp_model_path, filename), device="cpu", framework="pt") | ||||
|                 for non_tp_key in non_tp_model.keys(): | ||||
|                     non_tp_tensor = non_tp_model.get_tensor(non_tp_key) | ||||
|                     tp_tensor = tp_model.get_tensor(non_tp_key) | ||||
|                     assert torch.allclose(non_tp_tensor, tp_tensor), f"Tensor with key: {non_tp_key} does not match" | ||||
|                     del non_tp_tensor, tp_tensor | ||||
|  | ||||
|     def test_custom_tp_plan(self): | ||||
|         script_to_run = textwrap.dedent( | ||||
|             r""" | ||||
|             import re | ||||
|             import torch | ||||
|             from torch.distributed.tensor import DTensor | ||||
|             from transformers import AutoModelForCausalLM | ||||
|  | ||||
|             model_id = "JackFram/llama-68m" | ||||
|             # only shard attentions, but not mlps | ||||
|             tp_plan = { | ||||
|                 "model.layers.*.self_attn.q_proj": "colwise", | ||||
|                 "model.layers.*.self_attn.k_proj": "colwise", | ||||
|                 "model.layers.*.self_attn.v_proj": "colwise", | ||||
|                 "model.layers.*.self_attn.o_proj": "rowwise", | ||||
|             } | ||||
|  | ||||
|             # Use custom tp_plan directly in from_pretrained | ||||
|             model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, tp_plan=tp_plan) | ||||
|  | ||||
|             # Check we can generate with the tp_plan | ||||
|             inputs = torch.randint(100, 200, (1, 10), device=model.device) | ||||
|             out = model.generate(inputs, max_new_tokens=10, do_sample=False) | ||||
|  | ||||
|             # Check only the attentions are sharded | ||||
|             for name, param in model.named_parameters(): | ||||
|                 if re.search(r"\.self_attn\.(q|k|v|o)_proj\.", name): | ||||
|                     assert isinstance(param, DTensor) | ||||
|                 else: | ||||
|                     assert not isinstance(param, DTensor) | ||||
|             """ | ||||
|         ) | ||||
|         torchrun(script_to_run, self.nproc_per_node, env=self.get_env()) | ||||
|  | ||||
|  | ||||
| class TestTensorParallelProperties(TestCasePlus): | ||||
|     def test_tp_plan_property_setter_getter(self): | ||||
|         """Test that tp_plan property can be set and retrieved correctly.""" | ||||
|         from transformers import AutoModelForCausalLM | ||||
|  | ||||
|         model_id = "JackFram/llama-68m" | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto") | ||||
|  | ||||
| @ -135,6 +275,8 @@ class TestTensorParallelProperties(TestCasePlus): | ||||
|  | ||||
|     def test_tp_plan_validation_invalid_style(self): | ||||
|         """Test that invalid parallel styles are rejected.""" | ||||
|         from transformers import AutoModelForCausalLM | ||||
|  | ||||
|         model_id = "JackFram/llama-68m" | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto") | ||||
|  | ||||
| @ -147,6 +289,9 @@ class TestTensorParallelProperties(TestCasePlus): | ||||
|  | ||||
|     def test_tp_plan_validation_nonexistent_layer_warning(self): | ||||
|         """Test that warnings are issued for non-existent layer patterns.""" | ||||
|         import warnings | ||||
|  | ||||
|         from transformers import AutoModelForCausalLM | ||||
|  | ||||
|         model_id = "JackFram/llama-68m" | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto") | ||||
| @ -163,6 +308,10 @@ class TestTensorParallelProperties(TestCasePlus): | ||||
|  | ||||
|     def test_tp_plan_valid_layer_patterns(self): | ||||
|         """Test that valid layer patterns are accepted without warnings.""" | ||||
|         import warnings | ||||
|  | ||||
|         from transformers import AutoModelForCausalLM | ||||
|  | ||||
|         model_id = "JackFram/llama-68m" | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto") | ||||
|  | ||||
| @ -198,6 +347,8 @@ class TestTensorParallelProperties(TestCasePlus): | ||||
|  | ||||
|     def test_tp_plan_none_handling(self): | ||||
|         """Test that None values are handled correctly.""" | ||||
|         from transformers import AutoModelForCausalLM | ||||
|  | ||||
|         model_id = "JackFram/llama-68m" | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto") | ||||
|  | ||||
| @ -210,172 +361,6 @@ class TestTensorParallelProperties(TestCasePlus): | ||||
|         self.assertEqual(model.tp_plan, {"model.layers.*.self_attn.q_proj": "colwise"}) | ||||
|  | ||||
|  | ||||
| # ====== TEST FUNCTIONS ====== | ||||
| def _test_model_forward_impl(rank): | ||||
|     """Implementation of test_model_forward for distributed execution.""" | ||||
|     model_id = "JackFram/llama-68m" | ||||
|  | ||||
|     int(os.environ["RANK"]) | ||||
|     int(os.environ["WORLD_SIZE"]) | ||||
|     model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto") | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|     has_dtensor = 0 | ||||
|     for name, parameter in model.named_parameters(): | ||||
|         if isinstance(parameter.data, torch.distributed.tensor.DTensor): | ||||
|             has_dtensor = 1 | ||||
|             break | ||||
|  | ||||
|     assert has_dtensor == 1, "TP model must has DTensor" | ||||
|  | ||||
|     tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) | ||||
|     prompt = "Can I help" | ||||
|  | ||||
|     inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) | ||||
|     outputs = model(inputs) | ||||
|  | ||||
|     next_token_logits = outputs[0][:, -1, :] | ||||
|     next_token = torch.argmax(next_token_logits, dim=-1) | ||||
|     response = tokenizer.decode(next_token) | ||||
|     assert response == "with" | ||||
|     print("response:", response) | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|  | ||||
| def _test_model_backward_pass_impl(rank): | ||||
|     """Implementation of test_model_backward_pass for distributed execution.""" | ||||
|     model_id = "JackFram/llama-68m" | ||||
|  | ||||
|     model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float32, tp_plan="auto") | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|     # Dummy forward and backward pass | ||||
|     # Note that loss.backward() will fail if there is a bug in the TP implementation | ||||
|     inputs = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device) | ||||
|     labels = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device) | ||||
|     loss = model(inputs, labels=labels).loss | ||||
|     loss.backward() | ||||
|  | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|  | ||||
| def _test_model_generate_impl(rank): | ||||
|     """Implementation of test_model_generate for distributed execution.""" | ||||
|     model_id = "JackFram/llama-68m" | ||||
|  | ||||
|     int(os.environ["RANK"]) | ||||
|     int(os.environ["WORLD_SIZE"]) | ||||
|  | ||||
|     model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto") | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|     model.forward = torch.compile(model.forward) | ||||
|  | ||||
|     has_dtensor = 0 | ||||
|     for name, parameter in model.named_parameters(): | ||||
|         if isinstance(parameter.data, torch.distributed.tensor.DTensor): | ||||
|             has_dtensor = 1 | ||||
|             break | ||||
|  | ||||
|     assert has_dtensor == 1, "TP model must has DTensor" | ||||
|  | ||||
|     tokenizer = AutoTokenizer.from_pretrained(model_id) | ||||
|     prompt = "Can I help" | ||||
|  | ||||
|     inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) | ||||
|     outputs = model.generate(inputs, max_new_tokens=10, cache_implementation="static") | ||||
|  | ||||
|     output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True) | ||||
|     assert output_text[0].startswith(prompt), f"Expected output to start with '{prompt}', got '{output_text[0]}'" | ||||
|  | ||||
|     torch.distributed.barrier() | ||||
|  | ||||
|  | ||||
| def _test_model_save_impl(rank, tmp_dir, is_torchrun): | ||||
|     """Implementation of test_model_save for distributed execution.""" | ||||
|     model_id = "JackFram/llama-68m" | ||||
|     kwargs = {} | ||||
|  | ||||
|     if os.environ.get("RANK", None) is not None: | ||||
|         kwargs["tp_plan"] = "auto" | ||||
|         result_dir = f"{tmp_dir}/tp" | ||||
|     else: | ||||
|         result_dir = f"{tmp_dir}/nontp" | ||||
|  | ||||
|     model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs) | ||||
|     model.save_pretrained(result_dir) | ||||
|  | ||||
|  | ||||
| class TestTensorParallelBase(TestCasePlus): | ||||
|     """Base class for tensor parallel tests. Subclasses must set nproc_per_node.""" | ||||
|  | ||||
|     nproc_per_node = None | ||||
|  | ||||
|     @require_torch_multi_accelerator | ||||
|     def test_model_forward(self): | ||||
|         if self.nproc_per_node is None: | ||||
|             self.skipTest("nproc_per_node not set") | ||||
|         if backend_device_count(torch_device) < self.nproc_per_node: | ||||
|             self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}") | ||||
|  | ||||
|         init_distributed(tp=self.nproc_per_node)(_test_model_forward_impl)() | ||||
|  | ||||
|     @require_torch_multi_accelerator | ||||
|     def test_model_backward_pass(self): | ||||
|         if self.nproc_per_node is None: | ||||
|             self.skipTest("nproc_per_node not set") | ||||
|         if backend_device_count(torch_device) < self.nproc_per_node: | ||||
|             self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}") | ||||
|  | ||||
|         init_distributed(tp=self.nproc_per_node)(_test_model_backward_pass_impl)() | ||||
|  | ||||
|     @require_torch_multi_accelerator | ||||
|     def test_model_generate(self): | ||||
|         if self.nproc_per_node is None: | ||||
|             self.skipTest("nproc_per_node not set") | ||||
|         if backend_device_count(torch_device) < self.nproc_per_node: | ||||
|             self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}") | ||||
|  | ||||
|         init_distributed(tp=self.nproc_per_node)(_test_model_generate_impl)() | ||||
|  | ||||
|     @require_huggingface_hub_greater_or_equal("0.31.4") | ||||
|     @require_torch_multi_accelerator | ||||
|     def test_model_save(self): | ||||
|         if self.nproc_per_node is None: | ||||
|             self.skipTest("nproc_per_node not set") | ||||
|         if backend_device_count(torch_device) < self.nproc_per_node: | ||||
|             self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}") | ||||
|  | ||||
|         with tempfile.TemporaryDirectory() as tmp_dir: | ||||
|             # First run with TP (distributed) | ||||
|             init_distributed(tp=self.nproc_per_node)(_test_model_save_impl)(tmp_dir, True) | ||||
|  | ||||
|             # Then run without TP (non-distributed) | ||||
|             _test_model_save_impl(0, tmp_dir, False) | ||||
|  | ||||
|             non_tp_model_path = os.path.join(tmp_dir, "nontp") | ||||
|             tp_model_path = os.path.join(tmp_dir, "tp") | ||||
|  | ||||
|             for filename in os.listdir(non_tp_model_path): | ||||
|                 if not filename.endswith(".safetensors"): | ||||
|                     continue | ||||
|  | ||||
|                 non_tp_model = safe_open(os.path.join(non_tp_model_path, filename), device="cpu", framework="pt") | ||||
|                 tp_model = safe_open(os.path.join(tp_model_path, filename), device="cpu", framework="pt") | ||||
|                 for non_tp_key in non_tp_model.keys(): | ||||
|                     non_tp_tensor = non_tp_model.get_tensor(non_tp_key) | ||||
|                     tp_tensor = tp_model.get_tensor(non_tp_key) | ||||
|                     assert torch.allclose(non_tp_tensor, tp_tensor), f"Tensor with key: {non_tp_key} does not match" | ||||
|                     del non_tp_tensor, tp_tensor | ||||
|  | ||||
|  | ||||
| class TestTensorParallel2Proc(TestTensorParallelBase): | ||||
|     """Test tensor parallel with 2 processes.""" | ||||
|  | ||||
|     nproc_per_node = 2 | ||||
|  | ||||
|  | ||||
| class TestTensorParallel4Proc(TestTensorParallelBase): | ||||
|     """Test tensor parallel with 4 processes.""" | ||||
|  | ||||
|     nproc_per_node = 4 | ||||
| @require_torch_multi_accelerator | ||||
| class TestTensorParallelAccelerator(TestTensorParallel): | ||||
|     nproc_per_node = backend_device_count(torch_device) | ||||
|  | ||||
| @ -37,9 +37,8 @@ from transformers.testing_utils import ( | ||||
| from transformers.utils import is_torch_available, is_vision_available | ||||
|  | ||||
|  | ||||
| parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | ||||
| sys.path.append(os.path.join(parent_dir, "utils")) | ||||
| from fetch_hub_objects_for_ci import url_to_local_path  # noqa: E402 | ||||
| sys.path.append(".") | ||||
| from utils.fetch_hub_objects_for_ci import url_to_local_path | ||||
|  | ||||
|  | ||||
| global_rng = random.Random() | ||||
|  | ||||
| @ -799,9 +799,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|  | ||||
|         # Test 2: | ||||
|         # without tokenize | ||||
|         self.assertEqual( | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True).input_ids, expected_tokenized.tokens | ||||
|         ) | ||||
|         self.assertEqual(self.tokenizer.apply_chat_template(conversation, tokenize=True), expected_tokenized.tokens) | ||||
|  | ||||
|         with self.assertRaises( | ||||
|             ValueError, msg="Kwargs [unk_args] are not supported by `MistralCommonTokenizer.apply_chat_template`." | ||||
| @ -826,7 +824,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             expected_tokenized.text, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True).input_ids, | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True), | ||||
|             expected_tokenized.tokens, | ||||
|         ) | ||||
|  | ||||
| @ -848,7 +846,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 conversation, tokenize=True, add_generation_prompt=add_generation_prompt | ||||
|             ) | ||||
|             self.assertEqual(token_outputs.input_ids, expected_tokenized.tokens) | ||||
|             self.assertEqual(token_outputs, expected_tokenized.tokens) | ||||
|  | ||||
|         # Test 2: | ||||
|         # with continue_final_message | ||||
| @ -960,16 +958,18 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|                 }, | ||||
|             ] | ||||
|  | ||||
|             output = self.tokenizer.apply_chat_template(conversation).input_ids | ||||
|             output = self.tokenizer.apply_chat_template(conversation, tokenize=True) | ||||
|             self.assertEqual(output, expected_tokenized.tokens) | ||||
|  | ||||
|         output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True) | ||||
|         output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=True) | ||||
|         self.assertEqual(output_dict["input_ids"], expected_tokenized.tokens) | ||||
|         self.assertEqual(len(output_dict["pixel_values"]), len(expected_tokenized.images)) | ||||
|         for o, e in zip(output_dict["pixel_values"], expected_tokenized.images): | ||||
|             self.assertTrue(np.allclose(o, e)) | ||||
|  | ||||
|         output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_tensors="pt") | ||||
|         output_dict = self.tokenizer.apply_chat_template( | ||||
|             conversation, tokenize=True, return_dict=True, return_tensors="pt" | ||||
|         ) | ||||
|         self.assertEqual(output_dict["input_ids"].tolist()[0], expected_tokenized.tokens) | ||||
|         expected_images_pt_tensor = torch.from_numpy(np.stack(expected_tokenized.images)) | ||||
|         self.assertTrue(torch.allclose(output_dict["pixel_values"], expected_images_pt_tensor)) | ||||
| @ -1013,7 +1013,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|                 }, | ||||
|             ] | ||||
|  | ||||
|             output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True).input_ids | ||||
|             output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True) | ||||
|             self.assertEqual(output, expected_tokenized.tokens) | ||||
|  | ||||
|         output_dict = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True, return_dict=True) | ||||
| @ -1041,14 +1041,14 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         # Test 1: | ||||
|         # with truncation | ||||
|         self.assertEqual( | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20).input_ids, | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20), | ||||
|             expected_tokenized.tokens[:20], | ||||
|         ) | ||||
|  | ||||
|         # Test 2: | ||||
|         # without truncation | ||||
|         self.assertEqual( | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20).input_ids, | ||||
|             self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20), | ||||
|             expected_tokenized.tokens, | ||||
|         ) | ||||
|  | ||||
| @ -1130,7 +1130,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         ] | ||||
|  | ||||
|         text_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=False) | ||||
|         token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True).input_ids | ||||
|         token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True) | ||||
|  | ||||
|         self.assertEqual(len(text_outputs), len(token_outputs)) | ||||
|         self.assertEqual(len(text_outputs), len(expected_tokenized)) | ||||
| @ -1202,7 +1202,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             ChatCompletionRequest.from_openai(ref_conversation) | ||||
|         ) | ||||
|  | ||||
|         output = self.tokenizer.apply_chat_template(conversations, tokenize=True).input_ids | ||||
|         output = self.tokenizer.apply_chat_template(conversations, tokenize=True) | ||||
|         self.assertEqual(output, [expected_tokenized.tokens] * 3) | ||||
|  | ||||
|         output = self.tokenizer.apply_chat_template(conversations, tokenize=True, return_dict=True) | ||||
| @ -1248,9 +1248,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             for conversation in conversations | ||||
|         ] | ||||
|  | ||||
|         token_outputs = self.tokenizer.apply_chat_template( | ||||
|             conversations, tokenize=True, continue_final_message=True | ||||
|         ).input_ids | ||||
|         token_outputs = self.tokenizer.apply_chat_template(conversations, tokenize=True, continue_final_message=True) | ||||
|  | ||||
|         for output, expected in zip(token_outputs, expected_tokenized): | ||||
|             self.assertEqual(output, expected.tokens) | ||||
| @ -1299,7 +1297,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             ] | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 conversations, tokenize=True, add_generation_prompt=add_generation_prompt | ||||
|             ).input_ids | ||||
|             ) | ||||
|             for output, expected in zip(token_outputs, expected_tokenized): | ||||
|                 self.assertEqual(output, expected.tokens) | ||||
|  | ||||
| @ -1333,7 +1331,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         # with truncation | ||||
|         token_outputs = self.tokenizer.apply_chat_template( | ||||
|             self.fixture_conversations, tokenize=True, truncation=True, max_length=20 | ||||
|         ).input_ids | ||||
|         ) | ||||
|  | ||||
|         for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
|             self.assertEqual(output, expected.tokens[:20]) | ||||
| @ -1342,7 +1340,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         # without truncation | ||||
|         token_outputs = self.tokenizer.apply_chat_template( | ||||
|             self.fixture_conversations, tokenize=True, truncation=False, max_length=20 | ||||
|         ).input_ids | ||||
|         ) | ||||
|         self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations)) | ||||
|         for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
|             self.assertEqual(output, expected.tokens) | ||||
| @ -1360,9 +1358,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]: | ||||
|             if padding == PaddingStrategy.MAX_LENGTH: | ||||
|                 # No padding if no max length is provided | ||||
|                 token_outputs = self.tokenizer.apply_chat_template( | ||||
|                     self.fixture_conversations, padding=padding, return_dict=False | ||||
|                 ) | ||||
|                 token_outputs = self.tokenizer.apply_chat_template(self.fixture_conversations, padding=padding) | ||||
|                 self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations)) | ||||
|                 for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
|                     self.assertEqual(output, expected.tokens) | ||||
| @ -1370,7 +1366,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|             max_length = 20 if padding == PaddingStrategy.MAX_LENGTH else None | ||||
|  | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length, return_dict=False | ||||
|                 self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length | ||||
|             ) | ||||
|  | ||||
|             if padding != PaddingStrategy.MAX_LENGTH: | ||||
| @ -1394,7 +1390,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|  | ||||
|         for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]: | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 self.fixture_conversations, tokenize=True, padding=padding, return_dict=False | ||||
|                 self.fixture_conversations, tokenize=True, padding=padding | ||||
|             ) | ||||
|             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations)) | ||||
|             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
| @ -1406,12 +1402,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         max_length = 20 | ||||
|         for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]: | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 self.fixture_conversations, | ||||
|                 tokenize=True, | ||||
|                 truncation=True, | ||||
|                 padding=padding, | ||||
|                 max_length=max_length, | ||||
|                 return_dict=False, | ||||
|                 self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length | ||||
|             ) | ||||
|             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations)) | ||||
|             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
| @ -1420,12 +1411,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|                 ) | ||||
|         for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]: | ||||
|             token_outputs = self.tokenizer.apply_chat_template( | ||||
|                 self.fixture_conversations, | ||||
|                 tokenize=True, | ||||
|                 truncation=True, | ||||
|                 padding=padding, | ||||
|                 max_length=max_length, | ||||
|                 return_dict=False, | ||||
|                 self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length | ||||
|             ) | ||||
|             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations)) | ||||
|             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations): | ||||
| @ -1435,7 +1421,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         # Test 1: | ||||
|         # with tokenize | ||||
|         token_outputs = self.tokenizer.apply_chat_template( | ||||
|             self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True, return_dict=False | ||||
|             self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True | ||||
|         ) | ||||
|         self.assertIsInstance(token_outputs, torch.Tensor) | ||||
|         self.assertEqual( | ||||
| @ -1446,7 +1432,7 @@ class TestMistralCommonTokenizer(unittest.TestCase): | ||||
|         # Test 2: | ||||
|         # without tokenize, should ignore return_tensors | ||||
|         token_outputs = self.tokenizer.apply_chat_template( | ||||
|             self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True, return_dict=False | ||||
|             self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True | ||||
|         ) | ||||
|         self.assertEqual(token_outputs, [t.text for t in self.tokenized_fixture_conversations]) | ||||
|  | ||||
|  | ||||
| @ -323,7 +323,7 @@ class TokenizerUtilsTest(unittest.TestCase): | ||||
|         ] | ||||
|  | ||||
|         # First, test the default case, where we encode the whole conversation at once | ||||
|         whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=False) | ||||
|         whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True) | ||||
|  | ||||
|         # Now, test the message-by-message encoding | ||||
|         tokens = [] | ||||
|  | ||||
| @ -200,40 +200,6 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|         tokenizer_parsed_chat = tokenizer.parse_response(model_out) | ||||
|         self.assertEqual(tokenizer_parsed_chat, parsed_chat) | ||||
| 
 | ||||
|     def test_batched_inputs(self): | ||||
|         tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") | ||||
|         model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n    {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>' | ||||
|         tokenizer.response_schema = cohere_schema | ||||
|         parsed_chat = tokenizer.parse_response(model_out) | ||||
|         self.assertEqual(tokenizer.parse_response([model_out]), [parsed_chat]) | ||||
|         self.assertEqual(tokenizer.parse_response([model_out] * 2), [parsed_chat] * 2) | ||||
| 
 | ||||
|     def test_token_id_inputs(self): | ||||
|         tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")  # Need an actual tokenizer to encode | ||||
|         model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n    {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>' | ||||
|         tokenizer.response_schema = cohere_schema | ||||
|         parsed_chat = tokenizer.parse_response(model_out) | ||||
|         tokenized_out = tokenizer(model_out).input_ids | ||||
|         self.assertEqual(tokenizer.parse_response(tokenized_out), parsed_chat) | ||||
|         self.assertEqual(tokenizer.parse_response([tokenized_out]), [parsed_chat]) | ||||
|         self.assertEqual(tokenizer.parse_response([tokenized_out] * 2), [parsed_chat] * 2) | ||||
| 
 | ||||
|     def test_numpy_inputs(self): | ||||
|         tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")  # Need an actual tokenizer to encode | ||||
|         model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n    {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>' | ||||
|         tokenizer.response_schema = cohere_schema | ||||
|         parsed_chat = tokenizer.parse_response(model_out) | ||||
|         tokenized_out = tokenizer(model_out, return_tensors="np").input_ids | ||||
|         self.assertEqual(tokenizer.parse_response(tokenized_out), [parsed_chat]) | ||||
| 
 | ||||
|     def test_tensor_inputs(self): | ||||
|         tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")  # Need an actual tokenizer to encode | ||||
|         model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n    {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>' | ||||
|         tokenizer.response_schema = cohere_schema | ||||
|         parsed_chat = tokenizer.parse_response(model_out) | ||||
|         tokenized_out = tokenizer(model_out, return_tensors="pt").input_ids | ||||
|         self.assertEqual(tokenizer.parse_response(tokenized_out), [parsed_chat]) | ||||
| 
 | ||||
|     def test_cohere_template(self): | ||||
|         model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n    {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>' | ||||
|         parsed_chat = recursive_parse(model_out, cohere_schema) | ||||
| @ -315,7 +281,6 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             parsed_chat, | ||||
|             { | ||||
|                 "role": "assistant", | ||||
|                 "thinking": 'Okay, the user said, "Hello! How are you?" I need to respond appropriately. Since this is the first message, I should greet them back and ask how I can assist. I should keep it friendly and open-ended. Let me make sure the response is welcoming and encourages them to share what they need help with. I\'ll avoid any technical jargon and keep it simple. Let me check for any typos and ensure the tone is positive.', | ||||
|                 "tool_calls": [ | ||||
|                     { | ||||
| @ -337,10 +302,9 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             parsed_chat, | ||||
|             { | ||||
|                 "role": "assistant", | ||||
|                 "tool_calls": [ | ||||
|                     {"type": "function", "function": {"name": "get_weather", "arguments": {"city": "Paris"}}} | ||||
|                 ], | ||||
|                 ] | ||||
|             }, | ||||
|         ) | ||||
| 
 | ||||
| @ -350,7 +314,6 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             parsed_chat, | ||||
|             { | ||||
|                 "role": "assistant", | ||||
|                 "content": "Some content about gravity goes here but I'm cutting it off to make this shorter!", | ||||
|                 "thinking": 'Okay, the user asked, "Hey! Can you tell me about gravity?" Let me start by breaking down what they might be looking for. They probably want a basic understanding of gravity, maybe for a school project or just personal curiosity. I should explain what gravity is, how it works, and maybe some examples.', | ||||
|             }, | ||||
| @ -362,7 +325,6 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             parsed_chat, | ||||
|             { | ||||
|                 "role": "assistant", | ||||
|                 "tool_calls": [ | ||||
|                     { | ||||
|                         "type": "function", | ||||
| @ -374,6 +336,6 @@ class ChatSchemaParserTest(unittest.TestCase): | ||||
|                             }, | ||||
|                         }, | ||||
|                     } | ||||
|                 ], | ||||
|                 ] | ||||
|             }, | ||||
|         ) | ||||
| @ -1407,10 +1407,7 @@ if __name__ == "__main__": | ||||
|     if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")): | ||||
|         os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) | ||||
|  | ||||
|     nvidia_daily_ci_workflow = ( | ||||
|         "huggingface/transformers/.github/workflows/self-scheduled-caller.yml", | ||||
|         "huggingface/transformers/.github/workflows/self-scheduled-flash-attn-caller.yml", | ||||
|     ) | ||||
|     nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml" | ||||
|     amd_daily_ci_workflows = ( | ||||
|         "huggingface/transformers/.github/workflows/self-scheduled-amd-mi325-caller.yml", | ||||
|         "huggingface/transformers/.github/workflows/self-scheduled-amd-mi355-caller.yml", | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	