mirror of
				https://github.com/huggingface/transformers.git
				synced 2025-10-22 02:08:58 +08:00 
			
		
		
		
	Compare commits
	
		
			9 Commits
		
	
	
		
			remove_dat
			...
			fix-deepsp
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 6dc97e6a73 | |||
| 51e600996a | |||
| afe6fe9657 | |||
| a871c0b776 | |||
| 0949bfc0a0 | |||
| 412c317de3 | |||
| 52d632629d | |||
| cfef33b247 | |||
| 1c1905271e | 
| @ -16,9 +16,10 @@ | ||||
| import argparse | ||||
| import copy | ||||
| import os | ||||
| import random | ||||
| from dataclasses import dataclass | ||||
| from typing import Any, Optional | ||||
|  | ||||
| from typing import Any, Dict, List, Optional | ||||
| import glob | ||||
| import yaml | ||||
|  | ||||
|  | ||||
| @ -29,7 +30,6 @@ COMMON_ENV_VARIABLES = { | ||||
|     "RUN_PIPELINE_TESTS": False, | ||||
|     # will be adjust in `CircleCIJob.to_dict`. | ||||
|     "RUN_FLAKY": True, | ||||
|     "DISABLE_SAFETENSORS_CONVERSION": True, | ||||
| } | ||||
| # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical | ||||
| COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None} | ||||
| @ -82,15 +82,15 @@ class EmptyJob: | ||||
| @dataclass | ||||
| class CircleCIJob: | ||||
|     name: str | ||||
|     additional_env: dict[str, Any] = None | ||||
|     docker_image: list[dict[str, str]] = None | ||||
|     install_steps: list[str] = None | ||||
|     additional_env: Dict[str, Any] = None | ||||
|     docker_image: List[Dict[str, str]] = None | ||||
|     install_steps: List[str] = None | ||||
|     marker: Optional[str] = None | ||||
|     parallelism: Optional[int] = 0 | ||||
|     pytest_num_workers: int = 8 | ||||
|     pytest_options: dict[str, Any] = None | ||||
|     pytest_options: Dict[str, Any] = None | ||||
|     resource_class: Optional[str] = "xlarge" | ||||
|     tests_to_run: Optional[list[str]] = None | ||||
|     tests_to_run: Optional[List[str]] = None | ||||
|     num_test_files_per_worker: Optional[int] = 10 | ||||
|     # This should be only used for doctest job! | ||||
|     command_timeout: Optional[int] = None | ||||
| @ -109,9 +109,7 @@ class CircleCIJob: | ||||
|                 self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" | ||||
|             print(f"Using {self.docker_image} docker image") | ||||
|         if self.install_steps is None: | ||||
|             self.install_steps = ["uv pip install ."] | ||||
|         # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded` | ||||
|         self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh") | ||||
|             self.install_steps = ["uv venv && uv pip install ."] | ||||
|         if self.pytest_options is None: | ||||
|             self.pytest_options = {} | ||||
|         if isinstance(self.tests_to_run, str): | ||||
| @ -130,12 +128,6 @@ class CircleCIJob: | ||||
|  | ||||
|     def to_dict(self): | ||||
|         env = COMMON_ENV_VARIABLES.copy() | ||||
|         if self.job_name != "tests_hub": | ||||
|             # fmt: off | ||||
|             # not critical | ||||
|             env.update({"HF_TOKEN": "".join(["h", "f", "_", "H", "o", "d", "V", "u", "M", "q", "b", "R", "m", "t", "b", "z", "F", "Q", "O", "Q", "A", "J", "G", "D", "l", "V", "Q", "r", "R", "N", "w", "D", "M", "V", "C", "s", "d"])}) | ||||
|             # fmt: on | ||||
|  | ||||
|         # Do not run tests decorated by @is_flaky on pull requests | ||||
|         env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == "" | ||||
|         env.update(self.additional_env) | ||||
| @ -155,7 +147,7 @@ class CircleCIJob: | ||||
|                 # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues | ||||
|         timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else "" | ||||
|         marker_cmd = f"-m '{self.marker}'" if self.marker is not None else "" | ||||
|         junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" | ||||
|         junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" | ||||
|         joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS) | ||||
|         repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'" | ||||
|         parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> ' | ||||
| @ -183,33 +175,14 @@ class CircleCIJob: | ||||
|                     "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" | ||||
|                     } | ||||
|             }, | ||||
|             # During the CircleCI docker images build time, we might already (or not) download the data. | ||||
|             # If it's done already, the files are inside the directory `/test_data/`. | ||||
|             {"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}}, | ||||
|             {"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}}, | ||||
|             {"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}}, | ||||
|             {"run": { | ||||
|                 "name": "Run tests", | ||||
|                 "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} | ||||
|             }, | ||||
|             {"run": | ||||
|                 { | ||||
|                     "name": "Check for test crashes", | ||||
|                     "when": "always", | ||||
|                     "command": """if [ ! -f tests_output.txt ]; then | ||||
|                             echo "ERROR: tests_output.txt does not exist - tests may not have run properly" | ||||
|                             exit 1 | ||||
|                         elif grep -q "crashed and worker restarting disabled" tests_output.txt; then | ||||
|                             echo "ERROR: Worker crash detected in test output" | ||||
|                             echo "Found: crashed and worker restarting disabled" | ||||
|                             exit 1 | ||||
|                         else | ||||
|                             echo "Tests output file exists and no worker crashes detected" | ||||
|                         fi""" | ||||
|                 }, | ||||
|             }, | ||||
|             {"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, | ||||
|             {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, | ||||
|             {"run": {"name": "Errors",                       "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, | ||||
|             {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, | ||||
|             {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, | ||||
|             {"run": {"name": "Errors",                       "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, | ||||
|             {"store_test_results": {"path": "test-results"}}, | ||||
|             {"store_artifacts": {"path": "test-results/junit.xml"}}, | ||||
|             {"store_artifacts": {"path": "reports"}}, | ||||
| @ -240,7 +213,7 @@ generate_job = CircleCIJob( | ||||
|     docker_image=[{"image": "huggingface/transformers-torch-light"}], | ||||
|     # networkx==3.3 (after #36957) cause some issues | ||||
|     # TODO: remove this once it works directly | ||||
|     install_steps=["uv pip install ."], | ||||
|     install_steps=["uv venv && uv pip install ."], | ||||
|     marker="generate", | ||||
|     parallelism=6, | ||||
| ) | ||||
| @ -271,12 +244,13 @@ custom_tokenizers_job = CircleCIJob( | ||||
|     docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}], | ||||
| ) | ||||
|  | ||||
|  | ||||
| examples_torch_job = CircleCIJob( | ||||
|     "examples_torch", | ||||
|     additional_env={"OMP_NUM_THREADS": 8}, | ||||
|     docker_image=[{"image":"huggingface/transformers-examples-torch"}], | ||||
|     # TODO @ArthurZucker remove this once docker is easier to build | ||||
|     install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], | ||||
|     install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], | ||||
|     pytest_num_workers=4, | ||||
| ) | ||||
|  | ||||
| @ -285,7 +259,7 @@ hub_job = CircleCIJob( | ||||
|     additional_env={"HUGGINGFACE_CO_STAGING": True}, | ||||
|     docker_image=[{"image":"huggingface/transformers-torch-light"}], | ||||
|     install_steps=[ | ||||
|         'uv pip install .', | ||||
|         'uv venv && uv pip install .', | ||||
|         'git config --global user.email "ci@dummy.com"', | ||||
|         'git config --global user.name "ci"', | ||||
|     ], | ||||
| @ -294,6 +268,20 @@ hub_job = CircleCIJob( | ||||
|     resource_class="medium", | ||||
| ) | ||||
|  | ||||
|  | ||||
| onnx_job = CircleCIJob( | ||||
|     "onnx", | ||||
|     docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], | ||||
|     install_steps=[ | ||||
|         "uv venv", | ||||
|         "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]", | ||||
|     ], | ||||
|     pytest_options={"k onnx": None}, | ||||
|     pytest_num_workers=1, | ||||
|     resource_class="small", | ||||
| ) | ||||
|  | ||||
|  | ||||
| exotic_models_job = CircleCIJob( | ||||
|     "exotic_models", | ||||
|     docker_image=[{"image":"huggingface/transformers-exotic-models"}], | ||||
| @ -301,6 +289,7 @@ exotic_models_job = CircleCIJob( | ||||
|     pytest_options={"durations": 100}, | ||||
| ) | ||||
|  | ||||
|  | ||||
| repo_utils_job = CircleCIJob( | ||||
|     "repo_utils", | ||||
|     docker_image=[{"image":"huggingface/transformers-consistency"}], | ||||
| @ -308,12 +297,13 @@ repo_utils_job = CircleCIJob( | ||||
|     resource_class="large", | ||||
| ) | ||||
|  | ||||
|  | ||||
| non_model_job = CircleCIJob( | ||||
|     "non_model", | ||||
|     docker_image=[{"image": "huggingface/transformers-torch-light"}], | ||||
|     # networkx==3.3 (after #36957) cause some issues | ||||
|     # TODO: remove this once it works directly | ||||
|     install_steps=["uv pip install .[serving]"], | ||||
|     install_steps=["uv venv && uv pip install ."], | ||||
|     marker="not generate", | ||||
|     parallelism=6, | ||||
| ) | ||||
| @ -331,7 +321,7 @@ doc_test_job = CircleCIJob( | ||||
|     additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"}, | ||||
|     install_steps=[ | ||||
|         # Add an empty file to keep the test step running correctly even no file is selected to be tested. | ||||
|         "uv pip install .", | ||||
|         "uv venv && pip install .", | ||||
|         "touch dummy.py", | ||||
|         command, | ||||
|         "cat pr_documentation_tests_temp.txt", | ||||
| @ -343,7 +333,7 @@ doc_test_job = CircleCIJob( | ||||
|     pytest_num_workers=1, | ||||
| ) | ||||
|  | ||||
| REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip | ||||
| REGULAR_TESTS = [torch_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip | ||||
| EXAMPLES_TESTS = [examples_torch_job] | ||||
| PIPELINE_TESTS = [pipelines_torch_job] | ||||
| REPO_UTIL_TESTS = [repo_utils_job] | ||||
|  | ||||
| @ -1,6 +1,5 @@ | ||||
| import argparse | ||||
| import re | ||||
|  | ||||
| import argparse | ||||
|  | ||||
| def parse_pytest_output(file_path): | ||||
|     skipped_tests = {} | ||||
|  | ||||
							
								
								
									
										29
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
								
							| @ -36,23 +36,19 @@ body: | ||||
|  | ||||
|         Models: | ||||
|  | ||||
|           - text models: @ArthurZucker @Cyrilvallez | ||||
|           - vision models: @yonigozlan @molbap | ||||
|           - audio models: @eustlb @ebezzam @vasqu | ||||
|           - multimodal models: @zucchini-nlp | ||||
|           - text models: @ArthurZucker | ||||
|           - vision models: @amyeroberts, @qubvel | ||||
|           - speech models: @eustlb | ||||
|           - graph models: @clefourrier | ||||
|  | ||||
|         Library: | ||||
|  | ||||
|           - flax: @gante and @Rocketknight1 | ||||
|           - generate: @zucchini-nlp (visual-language models) or @gante (all others) | ||||
|           - continuous batching: @remi-or @ArthurZucker @McPatate | ||||
|           - pipelines: @Rocketknight1 | ||||
|           - tensorflow: @gante and @Rocketknight1 | ||||
|           - tokenizers: @ArthurZucker and @itazap | ||||
|           - trainer: @zach-huggingface @SunMarc | ||||
|           - attention: @vasqu @ArthurZucker @CyrilVallez | ||||
|           - model loading (from pretrained, etc): @CyrilVallez | ||||
|           - distributed: @3outeille @ArthurZucker @S1ro1 | ||||
|           - CIs: @ydshieh | ||||
|  | ||||
|         Integrations: | ||||
|  | ||||
| @ -60,8 +56,6 @@ body: | ||||
|           - ray/raytune: @richardliaw, @amogkam | ||||
|           - Big Model Inference: @SunMarc | ||||
|           - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber | ||||
|           - kernels: @MekkCyber @drbh | ||||
|           - peft: @BenjaminBossan @githubnemo | ||||
|          | ||||
|         Devices/Backends: | ||||
|          | ||||
| @ -75,6 +69,19 @@ body: | ||||
|  | ||||
|           - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator. | ||||
|  | ||||
|         HF projects: | ||||
|  | ||||
|           - accelerate: [different repo](https://github.com/huggingface/accelerate) | ||||
|           - datasets: [different repo](https://github.com/huggingface/datasets) | ||||
|           - diffusers: [different repo](https://github.com/huggingface/diffusers) | ||||
|           - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers) | ||||
|  | ||||
|         Maintained examples (not research project or legacy): | ||||
|  | ||||
|           - Flax: @Rocketknight1 | ||||
|           - PyTorch: See Models above and tag the person corresponding to the modality of the example. | ||||
|           - TensorFlow: @Rocketknight1 | ||||
|  | ||||
|         Research projects are not maintained and should be taken as is. | ||||
|  | ||||
|       placeholder: "@Username ..." | ||||
|  | ||||
							
								
								
									
										40
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										40
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @ -39,23 +39,20 @@ members/contributors who may be interested in your PR. | ||||
|  | ||||
| Models: | ||||
|  | ||||
| - text models: @ArthurZucker @Cyrilvallez | ||||
| - vision models: @yonigozlan @molbap | ||||
| - audio models: @eustlb @ebezzam @vasqu | ||||
| - multimodal models: @zucchini-nlp | ||||
| - text models: @ArthurZucker | ||||
| - vision models: @amyeroberts, @qubvel | ||||
| - speech models: @eustlb | ||||
| - graph models: @clefourrier | ||||
|  | ||||
| Library: | ||||
|  | ||||
| - flax: @gante and @Rocketknight1 | ||||
| - generate: @zucchini-nlp (visual-language models) or @gante (all others) | ||||
| - continuous batching: @remi-or @ArthurZucker @McPatate | ||||
| - pipelines: @Rocketknight1 | ||||
| - tokenizers: @ArthurZucker and @itazap | ||||
| - trainer: @zach-huggingface @SunMarc | ||||
| - attention: @vasqu @ArthurZucker @CyrilVallez | ||||
| - model loading (from pretrained, etc): @CyrilVallez | ||||
| - distributed: @3outeille @ArthurZucker @S1ro1 | ||||
| - CIs: @ydshieh | ||||
| - tensorflow: @gante and @Rocketknight1 | ||||
| - tokenizers: @ArthurZucker | ||||
| - trainer: @zach-huggingface, @SunMarc and @qgallouedec | ||||
| - chat templates: @Rocketknight1 | ||||
|  | ||||
| Integrations: | ||||
|  | ||||
| @ -63,17 +60,20 @@ Integrations: | ||||
| - ray/raytune: @richardliaw, @amogkam | ||||
| - Big Model Inference: @SunMarc | ||||
| - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber | ||||
| - kernels: @MekkCyber @drbh | ||||
| - peft: @BenjaminBossan @githubnemo | ||||
|  | ||||
| Devices/Backends: | ||||
|  | ||||
| - AMD ROCm: @ivarflakstad | ||||
| - Intel XPU: @IlyasMoutawwakil | ||||
| - Ascend NPU: @ivarflakstad  | ||||
|  | ||||
| Documentation: @stevhliu | ||||
|  | ||||
| Research projects are not maintained and should be taken as is. | ||||
| HF projects: | ||||
|  | ||||
| - accelerate: [different repo](https://github.com/huggingface/accelerate) | ||||
| - datasets: [different repo](https://github.com/huggingface/datasets) | ||||
| - diffusers: [different repo](https://github.com/huggingface/diffusers) | ||||
| - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers) | ||||
|  | ||||
| Maintained examples (not research project or legacy): | ||||
|  | ||||
| - Flax: @Rocketknight1 | ||||
| - PyTorch: See Models above and tag the person corresponding to the modality of the example. | ||||
| - TensorFlow: @Rocketknight1 | ||||
|  | ||||
|  --> | ||||
|  | ||||
							
								
								
									
										39
									
								
								.github/copilot-instructions.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								.github/copilot-instructions.md
									
									
									
									
										vendored
									
									
								
							| @ -1,39 +0,0 @@ | ||||
| # copilot-instructions.md Guide for Hugging Face Transformers | ||||
|  | ||||
| This copilot-instructions.md file provides guidance for code agents working with this codebase. | ||||
|  | ||||
| ## Core Project Structure | ||||
|  | ||||
| - `/src/transformers`: This contains the core source code for the library | ||||
|   - `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory. | ||||
| - `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run. | ||||
|   - `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory. | ||||
| - `/docs`: This contains the documentation for the library, including guides, tutorials, and API references. | ||||
|  | ||||
| ## Coding Conventions for Hugging Face Transformers | ||||
|  | ||||
| - PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff. | ||||
| - When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model. | ||||
| - Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code. | ||||
|  | ||||
| ## Copying and inheritance | ||||
|  | ||||
| Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained. | ||||
| We use two mechanisms to keep this code in sync: | ||||
|  | ||||
| - "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5` | ||||
|   These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should | ||||
|   either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate. | ||||
| - "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools | ||||
|   automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file | ||||
|   should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically. | ||||
|  | ||||
| When adding new models, you should prefer `modular` style and inherit as many classes as possible from existing models. | ||||
|  | ||||
| ## Testing | ||||
|  | ||||
| After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both | ||||
| the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py` | ||||
| If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`. | ||||
|  | ||||
| In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them. | ||||
							
								
								
									
										8
									
								
								.github/scripts/assign_reviewers.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/scripts/assign_reviewers.py
									
									
									
									
										vendored
									
									
								
							| @ -13,16 +13,14 @@ | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
|  | ||||
| import json | ||||
| import os | ||||
| import github | ||||
| import json | ||||
| from github import Github | ||||
| import re | ||||
| from collections import Counter | ||||
| from pathlib import Path | ||||
|  | ||||
| import github | ||||
| from github import Github | ||||
|  | ||||
|  | ||||
| def pattern_to_regex(pattern): | ||||
|     if pattern.startswith("/"): | ||||
|         start_anchor = True | ||||
|  | ||||
							
								
								
									
										130
									
								
								.github/scripts/codeowners_for_review_action
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										130
									
								
								.github/scripts/codeowners_for_review_action
									
									
									
									
										vendored
									
									
								
							| @ -7,8 +7,8 @@ docs/ @stevhliu | ||||
| /docker/ @ydshieh @ArthurZucker | ||||
|  | ||||
| # More high-level globs catch cases when specific rules later don't apply | ||||
| /src/transformers/models/*/processing* @molbap @yonigozlan | ||||
| /src/transformers/models/*/image_processing* @yonigozlan | ||||
| /src/transformers/models/*/processing* @molbap @yonigozlan @qubvel | ||||
| /src/transformers/models/*/image_processing* @qubvel | ||||
| /src/transformers/models/*/image_processing_*_fast* @yonigozlan | ||||
|  | ||||
| # Owners of subsections of the library | ||||
| @ -186,65 +186,65 @@ trainer_utils.py @zach-huggingface @SunMarc | ||||
| /src/transformers/models/zamba/mod*_zamba* @ArthurZucker | ||||
|  | ||||
| # Vision models | ||||
| /src/transformers/models/beit/mod*_beit* @yonigozlan @molbap | ||||
| /src/transformers/models/bit/mod*_bit* @yonigozlan @molbap | ||||
| /src/transformers/models/conditional_detr/mod*_conditional_detr* @yonigozlan @molbap | ||||
| /src/transformers/models/convnext/mod*_convnext* @yonigozlan @molbap | ||||
| /src/transformers/models/convnextv2/mod*_convnextv2* @yonigozlan @molbap | ||||
| /src/transformers/models/cvt/mod*_cvt* @yonigozlan @molbap | ||||
| /src/transformers/models/deformable_detr/mod*_deformable_detr* @yonigozlan @molbap | ||||
| /src/transformers/models/deit/mod*_deit* @yonigozlan @molbap | ||||
| /src/transformers/models/depth_anything/mod*_depth_anything* @yonigozlan @molbap | ||||
| /src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @yonigozlan @molbap | ||||
| /src/transformers/models/deta/mod*_deta* @yonigozlan @molbap | ||||
| /src/transformers/models/detr/mod*_detr* @yonigozlan @molbap | ||||
| /src/transformers/models/dinat/mod*_dinat* @yonigozlan @molbap | ||||
| /src/transformers/models/dinov2/mod*_dinov2* @yonigozlan @molbap | ||||
| /src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @yonigozlan @molbap | ||||
| /src/transformers/models/dit/mod*_dit* @yonigozlan @molbap | ||||
| /src/transformers/models/dpt/mod*_dpt* @yonigozlan @molbap | ||||
| /src/transformers/models/efficientformer/mod*_efficientformer* @yonigozlan @molbap | ||||
| /src/transformers/models/efficientnet/mod*_efficientnet* @yonigozlan @molbap | ||||
| /src/transformers/models/focalnet/mod*_focalnet* @yonigozlan @molbap | ||||
| /src/transformers/models/glpn/mod*_glpn* @yonigozlan @molbap | ||||
| /src/transformers/models/hiera/mod*_hiera* @yonigozlan @molbap | ||||
| /src/transformers/models/ijepa/mod*_ijepa* @yonigozlan @molbap | ||||
| /src/transformers/models/imagegpt/mod*_imagegpt* @yonigozlan @molbap | ||||
| /src/transformers/models/levit/mod*_levit* @yonigozlan @molbap | ||||
| /src/transformers/models/mask2former/mod*_mask2former* @yonigozlan @molbap | ||||
| /src/transformers/models/maskformer/mod*_maskformer* @yonigozlan @molbap | ||||
| /src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @yonigozlan @molbap | ||||
| /src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @yonigozlan @molbap | ||||
| /src/transformers/models/mobilevit/mod*_mobilevit* @yonigozlan @molbap | ||||
| /src/transformers/models/mobilevitv2/mod*_mobilevitv2* @yonigozlan @molbap | ||||
| /src/transformers/models/nat/mod*_nat* @yonigozlan @molbap | ||||
| /src/transformers/models/poolformer/mod*_poolformer* @yonigozlan @molbap | ||||
| /src/transformers/models/pvt/mod*_pvt* @yonigozlan @molbap | ||||
| /src/transformers/models/pvt_v2/mod*_pvt_v2* @yonigozlan @molbap | ||||
| /src/transformers/models/regnet/mod*_regnet* @yonigozlan @molbap | ||||
| /src/transformers/models/resnet/mod*_resnet* @yonigozlan @molbap | ||||
| /src/transformers/models/rt_detr/mod*_rt_detr* @yonigozlan @molbap | ||||
| /src/transformers/models/segformer/mod*_segformer* @yonigozlan @molbap | ||||
| /src/transformers/models/seggpt/mod*_seggpt* @yonigozlan @molbap | ||||
| /src/transformers/models/superpoint/mod*_superpoint* @yonigozlan @molbap | ||||
| /src/transformers/models/swiftformer/mod*_swiftformer* @yonigozlan @molbap | ||||
| /src/transformers/models/swin/mod*_swin* @yonigozlan @molbap | ||||
| /src/transformers/models/swinv2/mod*_swinv2* @yonigozlan @molbap | ||||
| /src/transformers/models/swin2sr/mod*_swin2sr* @yonigozlan @molbap | ||||
| /src/transformers/models/table_transformer/mod*_table_transformer* @yonigozlan @molbap | ||||
| /src/transformers/models/textnet/mod*_textnet* @yonigozlan @molbap | ||||
| /src/transformers/models/timm_wrapper/mod*_timm_wrapper* @yonigozlan @molbap | ||||
| /src/transformers/models/upernet/mod*_upernet* @yonigozlan @molbap | ||||
| /src/transformers/models/van/mod*_van* @yonigozlan @molbap | ||||
| /src/transformers/models/vit/mod*_vit* @yonigozlan @molbap | ||||
| /src/transformers/models/vit_hybrid/mod*_vit_hybrid* @yonigozlan @molbap | ||||
| /src/transformers/models/vitdet/mod*_vitdet* @yonigozlan @molbap | ||||
| /src/transformers/models/vit_mae/mod*_vit_mae* @yonigozlan @molbap | ||||
| /src/transformers/models/vitmatte/mod*_vitmatte* @yonigozlan @molbap | ||||
| /src/transformers/models/vit_msn/mod*_vit_msn* @yonigozlan @molbap | ||||
| /src/transformers/models/vitpose/mod*_vitpose* @yonigozlan @molbap | ||||
| /src/transformers/models/yolos/mod*_yolos* @yonigozlan @molbap | ||||
| /src/transformers/models/zoedepth/mod*_zoedepth* @yonigozlan @molbap | ||||
| /src/transformers/models/beit/mod*_beit* @amyeroberts @qubvel | ||||
| /src/transformers/models/bit/mod*_bit* @amyeroberts @qubvel | ||||
| /src/transformers/models/conditional_detr/mod*_conditional_detr* @amyeroberts @qubvel | ||||
| /src/transformers/models/convnext/mod*_convnext* @amyeroberts @qubvel | ||||
| /src/transformers/models/convnextv2/mod*_convnextv2* @amyeroberts @qubvel | ||||
| /src/transformers/models/cvt/mod*_cvt* @amyeroberts @qubvel | ||||
| /src/transformers/models/deformable_detr/mod*_deformable_detr* @amyeroberts @qubvel | ||||
| /src/transformers/models/deit/mod*_deit* @amyeroberts @qubvel | ||||
| /src/transformers/models/depth_anything/mod*_depth_anything* @amyeroberts @qubvel | ||||
| /src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @amyeroberts @qubvel | ||||
| /src/transformers/models/deta/mod*_deta* @amyeroberts @qubvel | ||||
| /src/transformers/models/detr/mod*_detr* @amyeroberts @qubvel | ||||
| /src/transformers/models/dinat/mod*_dinat* @amyeroberts @qubvel | ||||
| /src/transformers/models/dinov2/mod*_dinov2* @amyeroberts @qubvel | ||||
| /src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @amyeroberts @qubvel | ||||
| /src/transformers/models/dit/mod*_dit* @amyeroberts @qubvel | ||||
| /src/transformers/models/dpt/mod*_dpt* @amyeroberts @qubvel | ||||
| /src/transformers/models/efficientformer/mod*_efficientformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/efficientnet/mod*_efficientnet* @amyeroberts @qubvel | ||||
| /src/transformers/models/focalnet/mod*_focalnet* @amyeroberts @qubvel | ||||
| /src/transformers/models/glpn/mod*_glpn* @amyeroberts @qubvel | ||||
| /src/transformers/models/hiera/mod*_hiera* @amyeroberts @qubvel | ||||
| /src/transformers/models/ijepa/mod*_ijepa* @amyeroberts @qubvel | ||||
| /src/transformers/models/imagegpt/mod*_imagegpt* @amyeroberts @qubvel | ||||
| /src/transformers/models/levit/mod*_levit* @amyeroberts @qubvel | ||||
| /src/transformers/models/mask2former/mod*_mask2former* @amyeroberts @qubvel | ||||
| /src/transformers/models/maskformer/mod*_maskformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @amyeroberts @qubvel | ||||
| /src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @amyeroberts @qubvel | ||||
| /src/transformers/models/mobilevit/mod*_mobilevit* @amyeroberts @qubvel | ||||
| /src/transformers/models/mobilevitv2/mod*_mobilevitv2* @amyeroberts @qubvel | ||||
| /src/transformers/models/nat/mod*_nat* @amyeroberts @qubvel | ||||
| /src/transformers/models/poolformer/mod*_poolformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/pvt/mod*_pvt* @amyeroberts @qubvel | ||||
| /src/transformers/models/pvt_v2/mod*_pvt_v2* @amyeroberts @qubvel | ||||
| /src/transformers/models/regnet/mod*_regnet* @amyeroberts @qubvel | ||||
| /src/transformers/models/resnet/mod*_resnet* @amyeroberts @qubvel | ||||
| /src/transformers/models/rt_detr/mod*_rt_detr* @amyeroberts @qubvel | ||||
| /src/transformers/models/segformer/mod*_segformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/seggpt/mod*_seggpt* @amyeroberts @qubvel | ||||
| /src/transformers/models/superpoint/mod*_superpoint* @amyeroberts @qubvel | ||||
| /src/transformers/models/swiftformer/mod*_swiftformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/swin/mod*_swin* @amyeroberts @qubvel | ||||
| /src/transformers/models/swinv2/mod*_swinv2* @amyeroberts @qubvel | ||||
| /src/transformers/models/swin2sr/mod*_swin2sr* @amyeroberts @qubvel | ||||
| /src/transformers/models/table_transformer/mod*_table_transformer* @amyeroberts @qubvel | ||||
| /src/transformers/models/textnet/mod*_textnet* @amyeroberts @qubvel | ||||
| /src/transformers/models/timm_wrapper/mod*_timm_wrapper* @amyeroberts @qubvel | ||||
| /src/transformers/models/upernet/mod*_upernet* @amyeroberts @qubvel | ||||
| /src/transformers/models/van/mod*_van* @amyeroberts @qubvel | ||||
| /src/transformers/models/vit/mod*_vit* @amyeroberts @qubvel | ||||
| /src/transformers/models/vit_hybrid/mod*_vit_hybrid* @amyeroberts @qubvel | ||||
| /src/transformers/models/vitdet/mod*_vitdet* @amyeroberts @qubvel | ||||
| /src/transformers/models/vit_mae/mod*_vit_mae* @amyeroberts @qubvel | ||||
| /src/transformers/models/vitmatte/mod*_vitmatte* @amyeroberts @qubvel | ||||
| /src/transformers/models/vit_msn/mod*_vit_msn* @amyeroberts @qubvel | ||||
| /src/transformers/models/vitpose/mod*_vitpose* @amyeroberts @qubvel | ||||
| /src/transformers/models/yolos/mod*_yolos* @amyeroberts @qubvel | ||||
| /src/transformers/models/zoedepth/mod*_zoedepth* @amyeroberts @qubvel | ||||
|  | ||||
| # Audio models | ||||
| /src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb | ||||
| @ -304,7 +304,7 @@ trainer_utils.py @zach-huggingface @SunMarc | ||||
| /src/transformers/models/donut/mod*_donut* @zucchini-nlp | ||||
| /src/transformers/models/flava/mod*_flava* @zucchini-nlp | ||||
| /src/transformers/models/git/mod*_git* @zucchini-nlp | ||||
| /src/transformers/models/grounding_dino/mod*_grounding_dino* @yonigozlan | ||||
| /src/transformers/models/grounding_dino/mod*_grounding_dino* @qubvel | ||||
| /src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp | ||||
| /src/transformers/models/idefics/mod*_idefics* @zucchini-nlp | ||||
| /src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp | ||||
| @ -326,10 +326,10 @@ trainer_utils.py @zach-huggingface @SunMarc | ||||
| /src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp | ||||
| /src/transformers/models/mllama/mod*_mllama* @zucchini-nlp | ||||
| /src/transformers/models/nougat/mod*_nougat* @NielsRogge | ||||
| /src/transformers/models/omdet_turbo/mod*_omdet_turbo* @yonigozlan | ||||
| /src/transformers/models/omdet_turbo/mod*_omdet_turbo* @qubvel @yonigozlan | ||||
| /src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp | ||||
| /src/transformers/models/owlvit/mod*_owlvit* @yonigozlan | ||||
| /src/transformers/models/owlv2/mod*_owlv2* @yonigozlan | ||||
| /src/transformers/models/owlvit/mod*_owlvit* @qubvel | ||||
| /src/transformers/models/owlv2/mod*_owlv2* @qubvel | ||||
| /src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap | ||||
| /src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp | ||||
| /src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/benchmark.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/benchmark.yml
									
									
									
									
										vendored
									
									
								
							| @ -48,7 +48,7 @@ jobs: | ||||
|  | ||||
|       - name: Run database init script | ||||
|         run: | | ||||
|           psql -f benchmark/utils/init_db.sql | ||||
|           psql -f benchmark/init_db.sql | ||||
|         env: | ||||
|           PGDATABASE: metrics | ||||
|           PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }} | ||||
|  | ||||
							
								
								
									
										85
									
								
								.github/workflows/benchmark_v2.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										85
									
								
								.github/workflows/benchmark_v2.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,85 +0,0 @@ | ||||
| name: Benchmark v2 Framework | ||||
|  | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       runner: | ||||
|         description: 'GH Actions runner group to use' | ||||
|         required: true | ||||
|         type: string | ||||
|       container_image: | ||||
|         description: 'Docker image to use' | ||||
|         required: true | ||||
|         type: string | ||||
|       container_options: | ||||
|         description: 'Container options to use' | ||||
|         required: true | ||||
|         type: string | ||||
|       commit_sha: | ||||
|         description: 'Commit SHA to benchmark' | ||||
|         required: false | ||||
|         type: string | ||||
|         default: '' | ||||
|       run_id: | ||||
|         description: 'Custom run ID for organizing results (auto-generated if not provided)' | ||||
|         required: false | ||||
|         type: string | ||||
|         default: '' | ||||
|       benchmark_repo_id: | ||||
|         description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")' | ||||
|         required: false | ||||
|         type: string | ||||
|         default: '' | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||||
|   # This token is created under the bot `hf-transformers-bot`. | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|  | ||||
| jobs: | ||||
|   benchmark-v2: | ||||
|     name: Benchmark v2 | ||||
|     runs-on: ${{ inputs.runner }} | ||||
|     if: | | ||||
|       (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) || | ||||
|       (github.event_name == 'schedule') | ||||
|     container: | ||||
|       image: ${{ inputs.container_image }} | ||||
|       options: ${{ inputs.container_options }} | ||||
|     steps: | ||||
|       - name: Get repo | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ inputs.commit_sha || github.sha }} | ||||
|  | ||||
|       - name: Install benchmark dependencies | ||||
|         run: | | ||||
|           python3 -m pip install -r benchmark_v2/requirements.txt | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode | ||||
|         run: | | ||||
|           python3 -m pip uninstall -y transformers | ||||
|           python3 -m pip install -e ".[torch]" | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         run: | | ||||
|           python3 -m pip list | ||||
|           python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')" | ||||
|           python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" | ||||
|           python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true | ||||
|           nvidia-smi || true | ||||
|  | ||||
|       - name: Run benchmark v2 | ||||
|         working-directory: benchmark_v2 | ||||
|         run: | | ||||
|           echo "Running benchmarks" | ||||
|           python3 run_benchmarks.py \ | ||||
|           --commit-id '${{ inputs.commit_sha || github.sha }}' \ | ||||
|           --run-id '${{ inputs.run_id }}' \ | ||||
|           --push-to-hub '${{ inputs.benchmark_repo_id}}' \ | ||||
|           --token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \ | ||||
|           --log-level INFO | ||||
|         env: | ||||
|           HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
							
								
								
									
										21
									
								
								.github/workflows/benchmark_v2_a10_caller.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								.github/workflows/benchmark_v2_a10_caller.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,21 +0,0 @@ | ||||
| name: Benchmark v2 Scheduled Runner - A10 Single-GPU | ||||
|  | ||||
| on: | ||||
|   schedule: | ||||
|     # Run daily at 16:30 UTC | ||||
|     - cron: "30 16 * * *" | ||||
|   pull_request: | ||||
|     types: [ opened, labeled, reopened, synchronize ] | ||||
|  | ||||
| jobs: | ||||
|   benchmark-v2-default: | ||||
|     name: Benchmark v2 - Default Models | ||||
|     uses: ./.github/workflows/benchmark_v2.yml | ||||
|     with: | ||||
|       runner: aws-g5-4xlarge-cache-use1-public-80 | ||||
|       container_image: huggingface/transformers-pytorch-gpu | ||||
|       container_options: --gpus all --privileged --ipc host --shm-size "16gb" | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       run_id: ${{ github.run_id }} | ||||
|       benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks | ||||
|     secrets: inherit | ||||
							
								
								
									
										21
									
								
								.github/workflows/benchmark_v2_mi325_caller.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								.github/workflows/benchmark_v2_mi325_caller.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,21 +0,0 @@ | ||||
| name: Benchmark v2 Scheduled Runner - MI325 Single-GPU | ||||
|  | ||||
| on: | ||||
|   schedule: | ||||
|     # Run daily at 16:30 UTC | ||||
|     - cron: "30 16 * * *" | ||||
|   pull_request: | ||||
|     types: [ opened, labeled, reopened, synchronize ] | ||||
|  | ||||
| jobs: | ||||
|   benchmark-v2-default: | ||||
|     name: Benchmark v2 - Default Models | ||||
|     uses: ./.github/workflows/benchmark_v2.yml | ||||
|     with: | ||||
|       runner: amd-mi325-ci-1gpu | ||||
|       container_image: huggingface/transformers-pytorch-amd-gpu | ||||
|       container_options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       run_id: ${{ github.run_id }} | ||||
|       benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks | ||||
|     secrets: inherit | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-ci-docker-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-ci-docker-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -26,7 +26,7 @@ jobs: | ||||
|  | ||||
|     strategy: | ||||
|       matrix: | ||||
|         file: ["quality", "consistency", "custom-tokenizers", "torch-light", "exotic-models", "examples-torch"] | ||||
|         file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch",  "examples-tf"] | ||||
|     continue-on-error: true | ||||
|  | ||||
|     steps: | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/build-docker-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/build-docker-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -5,7 +5,6 @@ on: | ||||
|     branches: | ||||
|       - build_ci_docker_image* | ||||
|   repository_dispatch: | ||||
|   workflow_dispatch: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       image_postfix: | ||||
| @ -222,7 +221,7 @@ jobs: | ||||
|   latest-pytorch-amd: | ||||
|     name: "Latest PyTorch (AMD) [dev]" | ||||
|     runs-on: | ||||
|       group: aws-highcpu-32-priv | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
|       - | ||||
|         name: Set up Docker Buildx | ||||
|  | ||||
| @ -2,10 +2,6 @@ name: Build docker images (Nightly CI) | ||||
|  | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       job: | ||||
|         required: true | ||||
|         type: string | ||||
|   push: | ||||
|     branches: | ||||
|       - build_nightly_ci_docker_image* | ||||
| @ -16,8 +12,7 @@ concurrency: | ||||
|  | ||||
| jobs: | ||||
|   latest-with-torch-nightly-docker: | ||||
|     name: "Nightly PyTorch" | ||||
|     if: inputs.job == 'latest-with-torch-nightly-docker' || inputs.job == '' | ||||
|     name: "Nightly PyTorch + Stable TensorFlow" | ||||
|     runs-on: | ||||
|       group: aws-general-8-plus | ||||
|     steps: | ||||
| @ -46,7 +41,6 @@ jobs: | ||||
|  | ||||
|   nightly-torch-deepspeed-docker: | ||||
|     name: "Nightly PyTorch + DeepSpeed" | ||||
|     if: inputs.job == 'nightly-torch-deepspeed-docker' || inputs.job == '' | ||||
|     runs-on: | ||||
|       group: aws-g4dn-2xlarge-cache | ||||
|     steps: | ||||
|  | ||||
							
								
								
									
										14
									
								
								.github/workflows/build_documentation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								.github/workflows/build_documentation.yml
									
									
									
									
										vendored
									
									
								
							| @ -16,20 +16,8 @@ jobs: | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       package: transformers | ||||
|       notebook_folder: transformers_doc | ||||
|       languages: en | ||||
|       languages: ar de en es fr hi it ko pt tr zh ja te | ||||
|       custom_container: huggingface/transformers-doc-builder | ||||
|     secrets: | ||||
|       token: ${{ secrets.HUGGINGFACE_PUSH }} | ||||
|       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} | ||||
|  | ||||
|    build_other_lang: | ||||
|     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main | ||||
|     with: | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       package: transformers | ||||
|       notebook_folder: transformers_doc | ||||
|       languages: ar de es fr hi it ja ko pt zh | ||||
|       custom_container: huggingface/transformers-doc-builder | ||||
|     secrets: | ||||
|       token: ${{ secrets.HUGGINGFACE_PUSH }} | ||||
|       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} | ||||
							
								
								
									
										8
									
								
								.github/workflows/check_failed_tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/check_failed_tests.yml
									
									
									
									
										vendored
									
									
								
							| @ -21,9 +21,6 @@ on: | ||||
|       report_repo_id: | ||||
|         required: true | ||||
|         type: string | ||||
|       commit_sha: | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
|  | ||||
| env: | ||||
| @ -35,6 +32,7 @@ env: | ||||
|   # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||||
|   # This token is created under the bot `hf-transformers-bot`. | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|  | ||||
| @ -43,7 +41,7 @@ jobs: | ||||
|   check_new_failures: | ||||
|     name: " " | ||||
|     runs-on: | ||||
|       group: aws-g5-4xlarge-cache | ||||
|       group: aws-g4dn-4xlarge-cache | ||||
|     container: | ||||
|       image: ${{ inputs.docker }} | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
| @ -89,7 +87,7 @@ jobs: | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         if: ${{ env.process == 'true' }} | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Get target commit | ||||
|         working-directory: /transformers/utils | ||||
|  | ||||
							
								
								
									
										43
									
								
								.github/workflows/collated-reports.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										43
									
								
								.github/workflows/collated-reports.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,43 +0,0 @@ | ||||
| name: CI collated reports | ||||
|  | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       job: | ||||
|         required: true | ||||
|         type: string | ||||
|       report_repo_id: | ||||
|         required: true | ||||
|         type: string | ||||
|       machine_type: | ||||
|         required: true | ||||
|         type: string | ||||
|       gpu_name: | ||||
|         description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive. | ||||
|         required: true | ||||
|         type: string | ||||
|  | ||||
| jobs: | ||||
|   collated_reports: | ||||
|     name: Collated reports | ||||
|     runs-on: ubuntu-22.04 | ||||
|     if: always() | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/download-artifact@v4 | ||||
|  | ||||
|       - name: Collated reports | ||||
|         shell: bash | ||||
|         env: | ||||
|           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | ||||
|           CI_SHA: ${{ github.sha }} | ||||
|           TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} | ||||
|         run: | | ||||
|           pip install huggingface_hub | ||||
|           python3 utils/collated_reports.py                  \ | ||||
|             --path .                                         \ | ||||
|             --machine-type ${{ inputs.machine_type }}        \ | ||||
|             --commit-hash ${{ env.CI_SHA }}                  \ | ||||
|             --job ${{ inputs.job }}                          \ | ||||
|             --report-repo-id ${{ inputs.report_repo_id }}    \ | ||||
|             --gpu-name ${{ inputs.gpu_name }} | ||||
							
								
								
									
										5
									
								
								.github/workflows/doctest_job.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.github/workflows/doctest_job.yml
									
									
									
									
										vendored
									
									
								
							| @ -16,6 +16,7 @@ env: | ||||
|   RUN_SLOW: yes | ||||
|   OMP_NUM_THREADS: 16 | ||||
|   MKL_NUM_THREADS: 16 | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|  | ||||
| jobs: | ||||
| @ -27,10 +28,10 @@ jobs: | ||||
|       matrix: | ||||
|         split_keys: ${{ fromJson(inputs.split_keys) }} | ||||
|     runs-on:  | ||||
|       group: aws-g5-4xlarge-cache | ||||
|       group: aws-g4dn-4xlarge-cache | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/doctests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/doctests.yml
									
									
									
									
										vendored
									
									
								
							| @ -15,10 +15,10 @@ jobs: | ||||
|   setup: | ||||
|     name: Setup | ||||
|     runs-on:  | ||||
|       group: aws-g5-4xlarge-cache | ||||
|       group: aws-g4dn-4xlarge-cache | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       job_splits: ${{ steps.set-matrix.outputs.job_splits }} | ||||
|       split_keys: ${{ steps.set-matrix.outputs.split_keys }} | ||||
|  | ||||
							
								
								
									
										157
									
								
								.github/workflows/get-pr-info.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										157
									
								
								.github/workflows/get-pr-info.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,157 +0,0 @@ | ||||
| name: Get PR commit SHA | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       pr_number: | ||||
|         required: true | ||||
|         type: string | ||||
|     outputs: | ||||
|       PR_HEAD_REPO_FULL_NAME: | ||||
|         description: "The full name of the repository from which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }} | ||||
|       PR_BASE_REPO_FULL_NAME: | ||||
|         description: "The full name of the repository to which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }} | ||||
|       PR_HEAD_REPO_OWNER: | ||||
|         description: "The owner of the repository from which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }} | ||||
|       PR_BASE_REPO_OWNER: | ||||
|         description: "The owner of the repository to which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }} | ||||
|       PR_HEAD_REPO_NAME: | ||||
|         description: "The name of the repository from which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }} | ||||
|       PR_BASE_REPO_NAME: | ||||
|         description: "The name of the repository to which the pull request is created" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }} | ||||
|       PR_HEAD_REF: | ||||
|         description: "The branch name of the pull request in the head repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }} | ||||
|       PR_BASE_REF: | ||||
|         description: "The branch name in the base repository (to merge into)" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }} | ||||
|       PR_HEAD_SHA: | ||||
|         description: "The head sha of the pull request branch in the head repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }} | ||||
|       PR_BASE_SHA: | ||||
|         description: "The head sha of the target branch in the base repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }} | ||||
|       PR_MERGE_COMMIT_SHA: | ||||
|         description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }} | ||||
|       PR_HEAD_COMMIT_DATE: | ||||
|         description: "The date of the head sha of the pull request branch in the head repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }} | ||||
|       PR_MERGE_COMMIT_DATE: | ||||
|         description: "The date of the merge commit for the pull request (created by GitHub) in the base repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }} | ||||
|       PR_HEAD_COMMIT_TIMESTAMP: | ||||
|         description: "The timestamp of the head sha of the pull request branch in the head repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }} | ||||
|       PR_MERGE_COMMIT_TIMESTAMP: | ||||
|         description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }} | ||||
|       PR: | ||||
|         description: "The PR" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR }} | ||||
|       PR_FILES: | ||||
|         description: "The files touched in the PR" | ||||
|         value: ${{ jobs.get-pr-info.outputs.PR_FILES }} | ||||
|  | ||||
|  | ||||
| jobs: | ||||
|   get-pr-info: | ||||
|     runs-on: ubuntu-22.04 | ||||
|     name: Get PR commit SHA better | ||||
|     outputs: | ||||
|       PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }} | ||||
|       PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }} | ||||
|       PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }} | ||||
|       PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }} | ||||
|       PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }} | ||||
|       PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }} | ||||
|       PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }} | ||||
|       PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }} | ||||
|       PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }} | ||||
|       PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }} | ||||
|       PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }} | ||||
|       PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }} | ||||
|       PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }} | ||||
|       PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }} | ||||
|       PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }} | ||||
|       PR: ${{ steps.pr_info.outputs.pr }} | ||||
|       PR_FILES: ${{ steps.pr_info.outputs.files }} | ||||
|     if: ${{ inputs.pr_number != '' }} | ||||
|     steps: | ||||
|       - name: Extract PR details | ||||
|         id: pr_info | ||||
|         uses: actions/github-script@v6 | ||||
|         with: | ||||
|           script: |             | ||||
|             const { data: pr } = await github.rest.pulls.get({ | ||||
|               owner: context.repo.owner, | ||||
|               repo: context.repo.repo, | ||||
|               pull_number: ${{ inputs.pr_number }} | ||||
|             }); | ||||
|  | ||||
|             const { data: head_commit }  = await github.rest.repos.getCommit({ | ||||
|               owner: pr.head.repo.owner.login, | ||||
|               repo: pr.head.repo.name, | ||||
|               ref: pr.head.ref | ||||
|             }); | ||||
|  | ||||
|             const { data: merge_commit }  = await github.rest.repos.getCommit({ | ||||
|               owner: pr.base.repo.owner.login, | ||||
|               repo: pr.base.repo.name, | ||||
|               ref: pr.merge_commit_sha, | ||||
|             }); | ||||
|  | ||||
|             const { data: files } = await github.rest.pulls.listFiles({ | ||||
|               owner: context.repo.owner, | ||||
|               repo: context.repo.repo, | ||||
|               pull_number: ${{ inputs.pr_number }} | ||||
|             }); | ||||
|  | ||||
|             core.setOutput('head_repo_full_name', pr.head.repo.full_name); | ||||
|             core.setOutput('base_repo_full_name', pr.base.repo.full_name); | ||||
|             core.setOutput('head_repo_owner', pr.head.repo.owner.login); | ||||
|             core.setOutput('base_repo_owner', pr.base.repo.owner.login); | ||||
|             core.setOutput('head_repo_name', pr.head.repo.name); | ||||
|             core.setOutput('base_repo_name', pr.base.repo.name); | ||||
|             core.setOutput('head_ref', pr.head.ref); | ||||
|             core.setOutput('base_ref', pr.base.ref); | ||||
|             core.setOutput('head_sha', pr.head.sha); | ||||
|             core.setOutput('base_sha', pr.base.sha); | ||||
|             core.setOutput('merge_commit_sha', pr.merge_commit_sha); | ||||
|             core.setOutput('pr', pr); | ||||
|  | ||||
|             core.setOutput('head_commit_date', head_commit.commit.committer.date); | ||||
|             core.setOutput('merge_commit_date', merge_commit.commit.committer.date); | ||||
|              | ||||
|             core.setOutput('files', files);             | ||||
|              | ||||
|             console.log('PR head commit:', { | ||||
|               head_commit: head_commit, | ||||
|               commit: head_commit.commit, | ||||
|               date: head_commit.commit.committer.date | ||||
|             }); | ||||
|  | ||||
|             console.log('PR merge commit:', { | ||||
|               merge_commit: merge_commit, | ||||
|               commit: merge_commit.commit, | ||||
|               date: merge_commit.commit.committer.date | ||||
|             }); | ||||
|  | ||||
|       - name: Convert dates to timestamps | ||||
|         id: get_timestamps | ||||
|         run: | | ||||
|           head_commit_date=${{ steps.pr_info.outputs.head_commit_date }} | ||||
|           merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }} | ||||
|           echo $head_commit_date | ||||
|           echo $merge_commit_date | ||||
|           head_commit_timestamp=$(date -d "$head_commit_date" +%s) | ||||
|           merge_commit_timestamp=$(date -d "$merge_commit_date" +%s) | ||||
|           echo $head_commit_timestamp | ||||
|           echo $merge_commit_timestamp | ||||
|           echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT | ||||
|           echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT | ||||
							
								
								
									
										36
									
								
								.github/workflows/get-pr-number.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								.github/workflows/get-pr-number.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,36 +0,0 @@ | ||||
| name: Get PR number | ||||
| on: | ||||
|   workflow_call: | ||||
|     outputs: | ||||
|       PR_NUMBER: | ||||
|         description: "The extracted PR number" | ||||
|         value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }} | ||||
|  | ||||
| jobs: | ||||
|   get-pr-number: | ||||
|     runs-on: ubuntu-22.04 | ||||
|     name: Get PR number | ||||
|     outputs: | ||||
|       PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} | ||||
|     steps: | ||||
|       - name: Get PR number | ||||
|         shell: bash | ||||
|         run: | | ||||
|           if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then | ||||
|             echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV | ||||
|           elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then | ||||
|             echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV | ||||
|           elif [[ "${{ github.event.pull_request }}" != "" ]]; then | ||||
|             echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV | ||||
|           else | ||||
|             echo "PR_NUMBER=" >> $GITHUB_ENV | ||||
|           fi | ||||
|  | ||||
|       - name: Check PR number | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ env.PR_NUMBER }}" | ||||
|  | ||||
|       - name: Set PR number | ||||
|         id: set_pr_number | ||||
|         run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT" | ||||
							
								
								
									
										72
									
								
								.github/workflows/model_jobs.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										72
									
								
								.github/workflows/model_jobs.yml
									
									
									
									
										vendored
									
									
								
							| @ -12,22 +12,16 @@ on: | ||||
|       slice_id: | ||||
|         required: true | ||||
|         type: number | ||||
|       runner_map: | ||||
|         required: false | ||||
|         type: string | ||||
|       docker: | ||||
|         required: true | ||||
|         type: string | ||||
|       commit_sha: | ||||
|         required: false | ||||
|         type: string | ||||
|       report_name_prefix: | ||||
|         required: false | ||||
|         default: run_models_gpu | ||||
|         type: string | ||||
|       runner_type: | ||||
|         required: false | ||||
|         type: string | ||||
|       report_repo_id: | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
| @ -38,6 +32,7 @@ env: | ||||
|   # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||||
|   # This token is created under the bot `hf-transformers-bot`. | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|  | ||||
| @ -50,12 +45,10 @@ jobs: | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }} | ||||
|     runs-on: | ||||
|       group: '${{ inputs.machine_type }}' | ||||
|       group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }} | ||||
|     container: | ||||
|       image: ${{ inputs.docker }} | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       machine_type: ${{ steps.set_machine_type.outputs.machine_type }} | ||||
|     steps: | ||||
|       - name: Echo input and matrix info | ||||
|         shell: bash | ||||
| @ -77,7 +70,7 @@ jobs: | ||||
|  | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
| @ -109,15 +102,14 @@ jobs: | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Set `machine_type` for report and artifact names | ||||
|         id: set_machine_type | ||||
|         working-directory: /transformers | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ inputs.machine_type }}" | ||||
|  | ||||
|           if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ inputs.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ inputs.machine_type }} | ||||
| @ -125,58 +117,26 @@ jobs: | ||||
|  | ||||
|           echo "$machine_type" | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_ENV | ||||
|           echo "machine_type=$machine_type" >> $GITHUB_OUTPUT | ||||
|  | ||||
|       - name: Create report directory if it doesn't exist | ||||
|         shell: bash | ||||
|         run: | | ||||
|           mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports | ||||
|           echo "dummy" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/dummy.txt | ||||
|           ls -la /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports | ||||
|  | ||||
|       - name: Run all tests on GPU | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt | ||||
|           ls -la | ||||
|           # Extract the exit code from the output file | ||||
|           EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2) | ||||
|           exit ${EXIT_CODE:-1} | ||||
|         run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         # This step is only to show information on Github Actions log. | ||||
|         # Always mark this step as successful, even if the report directory or the file `failures_short.txt` in it doesn't exist | ||||
|         continue-on-error: true | ||||
|         run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/failures_short.txt | ||||
|         run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: Captured information | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|       - name: Run test | ||||
|         shell: bash | ||||
|         run: | | ||||
|           cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/captured_info.txt | ||||
|  | ||||
|       - name: Copy test_outputs.txt | ||||
|         if: ${{ always() }} | ||||
|         continue-on-error: true | ||||
|         run: | | ||||
|           cp /transformers/test_outputs.txt /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports | ||||
|           mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports | ||||
|           echo "hello" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt | ||||
|           echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports" | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports | ||||
|           path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports | ||||
|  | ||||
|   collated_reports: | ||||
|     name: Collated Reports | ||||
|     if: ${{ always() }} | ||||
|     needs: run_models_gpu | ||||
|     uses: huggingface/transformers/.github/workflows/collated-reports.yml@main | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       report_repo_id: ${{ inputs.report_repo_id }} | ||||
|       gpu_name: ${{ inputs.runner_type }} | ||||
|       machine_type: ${{ needs.run_models_gpu.outputs.machine_type }} | ||||
|     secrets: inherit | ||||
|           path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports | ||||
|  | ||||
							
								
								
									
										1
									
								
								.github/workflows/model_jobs_intel_gaudi.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/workflows/model_jobs_intel_gaudi.yml
									
									
									
									
										vendored
									
									
								
							| @ -26,6 +26,7 @@ env: | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   PT_ENABLE_INT64_SUPPORT: 1 | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   HF_HOME: /mnt/cache/.cache/huggingface | ||||
|  | ||||
| jobs: | ||||
|  | ||||
							
								
								
									
										134
									
								
								.github/workflows/pr_build_doc_with_comment.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										134
									
								
								.github/workflows/pr_build_doc_with_comment.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,134 +0,0 @@ | ||||
| name: PR - build doc via comment | ||||
| on: | ||||
|   issue_comment: | ||||
|     types: | ||||
|       - created | ||||
|     branches-ignore: | ||||
|       - main | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'build-doc') }} | ||||
|   cancel-in-progress: true | ||||
| permissions: {} | ||||
|  | ||||
|  | ||||
| jobs: | ||||
|   get-pr-number: | ||||
|     name: Get PR number | ||||
|     if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'build-doc')) }} | ||||
|     uses: ./.github/workflows/get-pr-number.yml | ||||
|  | ||||
|   get-pr-info: | ||||
|     name: Get PR commit SHA | ||||
|     needs: get-pr-number | ||||
|     if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} | ||||
|     uses: ./.github/workflows/get-pr-info.yml | ||||
|     with: | ||||
|       pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }} | ||||
|  | ||||
|   verity_pr_commit: | ||||
|     name: Verity PR commit corresponds to a specific event by comparing timestamps | ||||
|     if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} | ||||
|     runs-on: ubuntu-22.04 | ||||
|     needs: get-pr-info | ||||
|     env: | ||||
|       COMMENT_DATE: ${{ github.event.comment.created_at }} | ||||
|       PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }} | ||||
|       PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }} | ||||
|     steps: | ||||
|       - run: | | ||||
|           COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s") | ||||
|           echo "COMMENT_DATE: $COMMENT_DATE" | ||||
|           echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE" | ||||
|           echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP" | ||||
|           echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP" | ||||
|           if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then | ||||
|             echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!"; | ||||
|             exit -1; | ||||
|           fi | ||||
|  | ||||
|   create_run: | ||||
|     name: Create run | ||||
|     needs: [get-pr-number, get-pr-info] | ||||
|     if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }} | ||||
|     permissions: | ||||
|       statuses: write | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Create Run | ||||
|         id: create_run | ||||
|         env: | ||||
|           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|           # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`. | ||||
|           # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status | ||||
|           GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||||
|         run: | | ||||
|           gh api \ | ||||
|             --method POST \ | ||||
|             -H "Accept: application/vnd.github+json" \ | ||||
|             -H "X-GitHub-Api-Version: 2022-11-28" \ | ||||
|             repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \ | ||||
|             -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Custom doc building job" -f "context=custom-doc-build" | ||||
|  | ||||
|   reply_to_comment: | ||||
|     name: Reply to the comment | ||||
|     if: ${{ needs.create_run.result == 'success' }} | ||||
|     needs: [get-pr-number, create_run] | ||||
|     permissions: | ||||
|       pull-requests: write | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Reply to the comment | ||||
|         env: | ||||
|           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|           GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||||
|         run: | | ||||
|           gh api \ | ||||
|             --method POST \ | ||||
|             -H "Accept: application/vnd.github+json" \ | ||||
|             -H "X-GitHub-Api-Version: 2022-11-28" \ | ||||
|             repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \ | ||||
|             -f "body=[Building docs for all languages...](${{ env.GITHUB_RUN_URL }})" | ||||
|  | ||||
|   build-doc: | ||||
|     name: Build doc | ||||
|     needs: [get-pr-number, get-pr-info] | ||||
|     if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }} | ||||
|     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main | ||||
|     with: | ||||
|       commit_sha: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} | ||||
|       pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }} | ||||
|       package: transformers | ||||
|       languages: ar de en es fr hi it ko pt tr zh ja te | ||||
|  | ||||
|   update_run_status: | ||||
|     name: Update Check Run Status | ||||
|     needs: [ get-pr-info, create_run, build-doc ] | ||||
|     permissions: | ||||
|       statuses: write | ||||
|     if: ${{ always() && needs.create_run.result == 'success' }} | ||||
|     runs-on: ubuntu-22.04 | ||||
|     env: | ||||
|       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|       GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||||
|       STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.create_run.result) }} | ||||
|     steps: | ||||
|       - name: Get `build-doc` job status | ||||
|         run: | | ||||
|           echo "${{ needs.build-doc.result }}" | ||||
|           echo $STATUS_OK | ||||
|           if [ "$STATUS_OK" = "true" ]; then | ||||
|             echo "STATUS=success" >> $GITHUB_ENV | ||||
|           else | ||||
|             echo "STATUS=failure" >> $GITHUB_ENV | ||||
|           fi | ||||
|  | ||||
|       - name: Update PR commit statuses | ||||
|         run: | | ||||
|           echo "${{ needs.build-doc.result }}" | ||||
|           echo "${{ env.STATUS }}" | ||||
|           gh api \ | ||||
|             --method POST \ | ||||
|             -H "Accept: application/vnd.github+json" \ | ||||
|             -H "X-GitHub-Api-Version: 2022-11-28" \ | ||||
|             repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \ | ||||
|             -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Custom doc building job" -f "context=custom-doc-build" | ||||
							
								
								
									
										177
									
								
								.github/workflows/pr_run_slow_ci.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										177
									
								
								.github/workflows/pr_run_slow_ci.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,177 +0,0 @@ | ||||
| name: PR slow CI | ||||
| on: | ||||
|   pull_request_target: | ||||
|     types: [opened, synchronize, reopened] | ||||
|  | ||||
| jobs: | ||||
|   get-pr-number: | ||||
|     name: Get PR number | ||||
|     uses: ./.github/workflows/get-pr-number.yml | ||||
|  | ||||
|   get-pr-info: | ||||
|     name: Get PR commit SHA | ||||
|     needs: get-pr-number | ||||
|     if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} | ||||
|     uses: ./.github/workflows/get-pr-info.yml | ||||
|     with: | ||||
|       pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }} | ||||
|  | ||||
|   get-jobs: | ||||
|     name: Get test files to run | ||||
|     runs-on: ubuntu-22.04 | ||||
|     needs: [get-pr-number, get-pr-info] | ||||
|     outputs: | ||||
|       jobs: ${{ steps.get_jobs.outputs.jobs_to_run }} | ||||
|     steps: | ||||
|       - name: Get repository content | ||||
|         id: repo_content | ||||
|         uses: actions/github-script@v6 | ||||
|         with: | ||||
|           script: | | ||||
|             const { data: tests_dir } = await github.rest.repos.getContent({ | ||||
|               owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}', | ||||
|               repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}', | ||||
|               path: 'tests', | ||||
|               ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}', | ||||
|             }); | ||||
|  | ||||
|             const { data: tests_models_dir } = await github.rest.repos.getContent({ | ||||
|               owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}', | ||||
|               repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}', | ||||
|               path: 'tests/models', | ||||
|               ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}', | ||||
|             }); | ||||
|  | ||||
|             const { data: tests_quantization_dir } = await github.rest.repos.getContent({ | ||||
|               owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}', | ||||
|               repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}', | ||||
|               path: 'tests/quantization', | ||||
|               ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}', | ||||
|             }); | ||||
|  | ||||
|             core.setOutput('tests_dir', tests_dir); | ||||
|             core.setOutput('tests_models_dir', tests_models_dir); | ||||
|             core.setOutput('tests_quantization_dir', tests_quantization_dir); | ||||
|  | ||||
|       # This checkout to the main branch | ||||
|       - uses: actions/checkout@v4 | ||||
|         with: | ||||
|           fetch-depth: "0" | ||||
|  | ||||
|       - name: Write pr_files file | ||||
|         run: | | ||||
|           cat > pr_files.txt << 'EOF' | ||||
|           ${{ needs.get-pr-info.outputs.PR_FILES }} | ||||
|           EOF | ||||
|  | ||||
|       - name: Write tests_dir file | ||||
|         run: | | ||||
|           cat > tests_dir.txt << 'EOF' | ||||
|           ${{ steps.repo_content.outputs.tests_dir }} | ||||
|           EOF | ||||
|  | ||||
|       - name: Write tests_models_dir file | ||||
|         run: | | ||||
|           cat > tests_models_dir.txt << 'EOF' | ||||
|           ${{ steps.repo_content.outputs.tests_models_dir }} | ||||
|           EOF | ||||
|  | ||||
|       - name: Write tests_quantization_dir file | ||||
|         run: | | ||||
|           cat > tests_quantization_dir.txt << 'EOF' | ||||
|           ${{ steps.repo_content.outputs.tests_quantization_dir }} | ||||
|           EOF | ||||
|  | ||||
|       - name: Run script to get jobs to run | ||||
|         id: get_jobs | ||||
|         run: | | ||||
|           python utils/get_pr_run_slow_jobs.py | tee output.txt | ||||
|           echo "jobs_to_run: $(tail -n 1 output.txt)" | ||||
|           echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT | ||||
|  | ||||
|   send_comment: | ||||
|     # Will delete the previous comment and send a new one if: | ||||
|     #   - either the content is changed | ||||
|     #   - or the previous comment is 30 minutes or more old | ||||
|     name: Send a comment to suggest jobs to run | ||||
|     if: ${{ needs.get-jobs.outputs.jobs != '' }} | ||||
|     needs: [get-pr-number, get-jobs] | ||||
|     permissions: | ||||
|       pull-requests: write | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Check and update comment if needed | ||||
|         uses: actions/github-script@v7 | ||||
|         env: | ||||
|           BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}" | ||||
|         with: | ||||
|           script: | | ||||
|             const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }}; | ||||
|             const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)"; | ||||
|             const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000); // 30 minutes ago | ||||
|             const newBody = `${commentPrefix}${process.env.BODY}`; | ||||
|              | ||||
|             // Get all comments on the PR | ||||
|             const { data: comments } = await github.rest.issues.listComments({ | ||||
|               owner: context.repo.owner, | ||||
|               repo: context.repo.repo, | ||||
|               issue_number: prNumber | ||||
|             }); | ||||
|              | ||||
|             // Find existing comments that start with our prefix | ||||
|             const existingComments = comments.filter(comment =>  | ||||
|               comment.user.login === 'github-actions[bot]' &&  | ||||
|               comment.body.startsWith(commentPrefix) | ||||
|             ); | ||||
|              | ||||
|             let shouldCreateNewComment = true; | ||||
|             let commentsToDelete = []; | ||||
|              | ||||
|             if (existingComments.length > 0) { | ||||
|               // Get the most recent comment | ||||
|               const mostRecentComment = existingComments | ||||
|                 .sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0]; | ||||
|                | ||||
|               const commentDate = new Date(mostRecentComment.created_at); | ||||
|               const isOld = commentDate < thirtyMinutesAgo; | ||||
|               const isDifferentContent = mostRecentComment.body !== newBody; | ||||
|                | ||||
|               console.log(`Most recent comment created: ${mostRecentComment.created_at}`); | ||||
|               console.log(`Is older than 30 minutes: ${isOld}`); | ||||
|               console.log(`Has different content: ${isDifferentContent}`); | ||||
|                | ||||
|               if (isOld || isDifferentContent) { | ||||
|                 // Delete all existing comments and create new one | ||||
|                 commentsToDelete = existingComments; | ||||
|                 console.log(`Will delete ${commentsToDelete.length} existing comment(s) and create new one`); | ||||
|               } else { | ||||
|                 // Content is same and comment is recent, skip | ||||
|                 shouldCreateNewComment = false; | ||||
|                 console.log('Comment is recent and content unchanged, skipping update'); | ||||
|               } | ||||
|             } else { | ||||
|               console.log('No existing comments found, will create new one'); | ||||
|             } | ||||
|              | ||||
|             // Delete old comments if needed | ||||
|             for (const comment of commentsToDelete) { | ||||
|               console.log(`Deleting comment #${comment.id} (created: ${comment.created_at})`); | ||||
|               await github.rest.issues.deleteComment({ | ||||
|                 owner: context.repo.owner, | ||||
|                 repo: context.repo.repo, | ||||
|                 comment_id: comment.id | ||||
|               }); | ||||
|             } | ||||
|              | ||||
|             // Create new comment if needed | ||||
|             if (shouldCreateNewComment) { | ||||
|               await github.rest.issues.createComment({ | ||||
|                 owner: context.repo.owner, | ||||
|                 repo: context.repo.repo, | ||||
|                 issue_number: prNumber, | ||||
|                 body: newBody | ||||
|               }); | ||||
|               console.log('✅ New comment created'); | ||||
|             } else { | ||||
|               console.log('ℹ️ No comment update needed'); | ||||
|             } | ||||
							
								
								
									
										250
									
								
								.github/workflows/push-important-models.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										250
									
								
								.github/workflows/push-important-models.yml
									
									
									
									
										vendored
									
									
								
							| @ -4,6 +4,17 @@ on: | ||||
|   push: | ||||
|     branches: [ main ] | ||||
|  | ||||
| env: | ||||
|   OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA" | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   HF_HOME: /mnt/cache | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   OMP_NUM_THREADS: 8 | ||||
|   MKL_NUM_THREADS: 8 | ||||
|   RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|  | ||||
| jobs: | ||||
|   get_modified_models: | ||||
|     name: "Get all modified files" | ||||
| @ -14,144 +25,111 @@ jobs: | ||||
|       - name: Check out code | ||||
|         uses: actions/checkout@v4 | ||||
|  | ||||
|       - name: Get changed files using `actions/github-script` | ||||
|         id: get-changed-files | ||||
|         uses: actions/github-script@v7 | ||||
|       - name: Get changed files | ||||
|         id: changed-files | ||||
|         uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c | ||||
|         with: | ||||
|           script: | | ||||
|             let files = []; | ||||
|              | ||||
|             // Only handle push events | ||||
|             if (context.eventName === 'push') { | ||||
|               const afterSha = context.payload.after; | ||||
|               const branchName = context.payload.ref.replace('refs/heads/', ''); | ||||
|                | ||||
|               let baseSha; | ||||
|                | ||||
|               if (branchName === 'main') { | ||||
|                 console.log('Push to main branch, comparing to parent commit'); | ||||
|                 // Get the parent commit of the pushed commit | ||||
|                 const { data: commit } = await github.rest.repos.getCommit({ | ||||
|                   owner: context.repo.owner, | ||||
|                   repo: context.repo.repo, | ||||
|                   ref: afterSha | ||||
|                 }); | ||||
|                 baseSha = commit.parents[0]?.sha; | ||||
|                 if (!baseSha) { | ||||
|                   throw new Error('No parent commit found for the pushed commit'); | ||||
|                 } | ||||
|               } else { | ||||
|                 console.log(`Push to branch ${branchName}, comparing to main`); | ||||
|                 baseSha = 'main'; | ||||
|               } | ||||
|                | ||||
|               const { data: comparison } = await github.rest.repos.compareCommits({ | ||||
|                 owner: context.repo.owner, | ||||
|                 repo: context.repo.repo, | ||||
|                 base: baseSha, | ||||
|                 head: afterSha | ||||
|               }); | ||||
|                | ||||
|               // Include added, modified, and renamed files | ||||
|               files = comparison.files | ||||
|                 .filter(file => file.status === 'added' || file.status === 'modified' || file.status === 'renamed') | ||||
|                 .map(file => file.filename); | ||||
|             } | ||||
|              | ||||
|             // Include all files under src/transformers/ (not just models subdirectory) | ||||
|             const filteredFiles = files.filter(file =>  | ||||
|               file.startsWith('src/transformers/') | ||||
|             ); | ||||
|              | ||||
|             core.setOutput('changed_files', filteredFiles.join(' ')); | ||||
|             core.setOutput('any_changed', filteredFiles.length > 0 ? 'true' : 'false'); | ||||
|           files: src/transformers/models/** | ||||
|  | ||||
|       - name: Parse changed files with Python | ||||
|         if: steps.get-changed-files.outputs.any_changed == 'true' | ||||
|         env: | ||||
|           CHANGED_FILES: ${{ steps.get-changed-files.outputs.changed_files }} | ||||
|       - name: Run step if only the files listed above change | ||||
|         if: steps.changed-files.outputs.any_changed == 'true' | ||||
|         id: set-matrix | ||||
|         env: | ||||
|           ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} | ||||
|         run: | | ||||
|           python3 - << 'EOF' | ||||
|           import os | ||||
|           import sys | ||||
|           import json | ||||
|            | ||||
|           # Add the utils directory to Python path | ||||
|           sys.path.insert(0, 'utils') | ||||
|            | ||||
|           # Import the important models list | ||||
|           from important_files import IMPORTANT_MODELS | ||||
|            | ||||
|           print(f"Important models: {IMPORTANT_MODELS}") | ||||
|            | ||||
|           # Get the changed files from the previous step | ||||
|           changed_files_str = os.environ.get('CHANGED_FILES', '') | ||||
|           changed_files = changed_files_str.split() if changed_files_str else [] | ||||
|            | ||||
|           # Filter to only Python files | ||||
|           python_files = [f for f in changed_files if f.endswith('.py')] | ||||
|           print(f"Python files changed: {python_files}") | ||||
|            | ||||
|           result_models = set() | ||||
|            | ||||
|           # Specific files that trigger all models | ||||
|           transformers_utils_files = [ | ||||
|               'modeling_utils.py', | ||||
|               'modeling_rope_utils.py',  | ||||
|               'modeling_flash_attention_utils.py', | ||||
|               'modeling_attn_mask_utils.py', | ||||
|               'cache_utils.py', | ||||
|               'masking_utils.py', | ||||
|               'pytorch_utils.py' | ||||
|           ] | ||||
|            | ||||
|           # Single loop through all Python files | ||||
|           for file in python_files: | ||||
|               # Check for files under src/transformers/models/ | ||||
|               if file.startswith('src/transformers/models/'): | ||||
|                   remaining_path = file[len('src/transformers/models/'):] | ||||
|                   if '/' in remaining_path: | ||||
|                       model_dir = remaining_path.split('/')[0] | ||||
|                       if model_dir in IMPORTANT_MODELS: | ||||
|                           result_models.add(model_dir) | ||||
|                           print(f"Added model directory: {model_dir}") | ||||
|                | ||||
|               # Check for specific files under src/transformers/ or src/transformers/generation/ files | ||||
|               elif file.startswith('src/transformers/generation/') or \ | ||||
|                    (file.startswith('src/transformers/') and os.path.basename(file) in transformers_utils_files): | ||||
|                   print(f"Found core file: {file} - including all important models") | ||||
|                   result_models.update(IMPORTANT_MODELS) | ||||
|                   break  # No need to continue once we include all models | ||||
|            | ||||
|           # Convert to sorted list and create matrix | ||||
|           result_list = sorted(list(result_models)) | ||||
|           print(f"Final model list: {result_list}") | ||||
|            | ||||
|           if result_list: | ||||
|               matrix_json = json.dumps(result_list) | ||||
|               print(f"matrix={matrix_json}") | ||||
|                | ||||
|               # Write to GITHUB_OUTPUT | ||||
|               with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | ||||
|                   f.write(f"matrix={matrix_json}\n") | ||||
|           else: | ||||
|               print("matrix=[]") | ||||
|               with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | ||||
|                   f.write("matrix=[]\n") | ||||
|           EOF | ||||
|  | ||||
|   model-ci: | ||||
|     name: Model CI | ||||
|     uses: ./.github/workflows/self-scheduled.yml | ||||
|             model_arrays=() | ||||
|             for file in $ALL_CHANGED_FILES; do | ||||
|                 model_path="${file#*models/}" | ||||
|                 model_path="models/${model_path%%/*}" | ||||
|                 if grep -qFx "$model_path" utils/important_models.txt; then | ||||
|                     # Append the file to the matrix string | ||||
|                     model_arrays+=("$model_path") | ||||
|                 fi | ||||
|             done | ||||
|             matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//') | ||||
|             echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT | ||||
|   test_modified_files: | ||||
|     needs: get_modified_models | ||||
|     if: needs.get_modified_models.outputs.matrix != '' && needs.get_modified_models.outputs.matrix != '[]' | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#transformers-ci-push" | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       ci_event: push | ||||
|       report_repo_id: hf-internal-testing/transformers_ci_push | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       models: ${{ needs.get_modified_models.outputs.matrix }} | ||||
|     secrets: inherit | ||||
|     name: Slow & FA2 tests | ||||
|     runs-on: | ||||
|       group: aws-g5-4xlarge-cache | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }} | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }} | ||||
|  | ||||
|     steps: | ||||
|       - name: Check out code | ||||
|         uses: actions/checkout@v4 | ||||
|  | ||||
|       - name: Install locally transformers & other libs | ||||
|         run: | | ||||
|           apt install sudo | ||||
|           sudo -H pip install --upgrade pip | ||||
|           sudo -H pip uninstall -y transformers | ||||
|           sudo -H pip install -U -e ".[testing]" | ||||
|           MAX_JOBS=4 pip install flash-attn --no-build-isolation | ||||
|           pip install bitsandbytes | ||||
|  | ||||
|       - name: NVIDIA-SMI | ||||
|         run: | | ||||
|           nvidia-smi | ||||
|  | ||||
|       - name: Show installed libraries and their versions | ||||
|         run: pip freeze | ||||
|  | ||||
|       - name: Run FA2 tests | ||||
|         id: run_fa2_tests | ||||
|         run: | ||||
|           pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_* | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ matrix.model-name }}_fa2_tests | ||||
|           path: /transformers/reports/${{ matrix.model-name }}_fa2_tests | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }} | ||||
|           title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }} | ||||
|           status: ${{ steps.run_fa2_tests.conclusion}} | ||||
|           slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }} | ||||
|  | ||||
|       - name: Run integration tests | ||||
|         id: run_integration_tests | ||||
|         if: always() | ||||
|         run: | ||||
|           pytest -rsfE -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_* | ||||
|  | ||||
|       - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: tests_integration_${{ matrix.model-name }} | ||||
|           path: /transformers/reports/tests_integration_${{ matrix.model-name }} | ||||
|  | ||||
|       - name: Post to Slack | ||||
|         if: always() | ||||
|         uses: huggingface/hf-workflows/.github/actions/post-slack@main | ||||
|         with: | ||||
|           slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }} | ||||
|           title: 🤗 Results of the Integration tests - ${{ matrix.model-name }} | ||||
|           status: ${{ steps.run_integration_tests.conclusion}} | ||||
|           slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }} | ||||
|  | ||||
|       - name: Tailscale # In order to be able to SSH when a test fails | ||||
|         if: ${{ runner.debug == '1'}} | ||||
|         uses: huggingface/tailscale-action@v1 | ||||
|         with: | ||||
|           authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} | ||||
|           slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} | ||||
|           slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | ||||
|           waitForSSH: true | ||||
|  | ||||
							
								
								
									
										15
									
								
								.github/workflows/self-comment-ci.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/self-comment-ci.yml
									
									
									
									
										vendored
									
									
								
							| @ -20,6 +20,7 @@ env: | ||||
|   # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||||
|   # This token is created under the bot `hf-transformers-bot`. | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|  | ||||
| @ -28,7 +29,7 @@ jobs: | ||||
|     runs-on: ubuntu-22.04 | ||||
|     name: Get PR number | ||||
|     # For security: only allow team members to run | ||||
|     if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} | ||||
|     if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} | ||||
|     outputs: | ||||
|       PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} | ||||
|     steps: | ||||
| @ -184,7 +185,7 @@ jobs: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.get-tests.outputs.models) }} | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|        group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -238,9 +239,9 @@ jobs: | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -291,7 +292,7 @@ jobs: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }} | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -337,9 +338,9 @@ jobs: | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|  | ||||
							
								
								
									
										61
									
								
								.github/workflows/self-nightly-caller.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										61
									
								
								.github/workflows/self-nightly-caller.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,56 +1,43 @@ | ||||
| name: Nvidia CI with nightly torch | ||||
| name: Self-hosted runner (nightly-ci) | ||||
|  | ||||
|  | ||||
| on: | ||||
|   repository_dispatch: | ||||
|   # triggered when the daily scheduled Nvidia CI is completed. | ||||
|   # This way, we can compare the results more easily. | ||||
|   workflow_run: | ||||
|     workflows: ["Nvidia CI"] | ||||
|     branches: ["main"] | ||||
|     types: [completed] | ||||
|   schedule: | ||||
|     - cron: "17 2 * * *" | ||||
|   push: | ||||
|     branches: | ||||
|       - run_ci_with_nightly_torch* | ||||
|  | ||||
| # Used for `push` to easily modify the target workflow runs to compare against | ||||
| env: | ||||
|     prev_workflow_run_id: "" | ||||
|     other_workflow_run_id: "" | ||||
|  | ||||
|       - run_nightly_ci* | ||||
|  | ||||
| jobs: | ||||
|   build_nightly_torch_ci_images: | ||||
|     name: Build CI Docker Images with nightly torch | ||||
|   build_nightly_ci_images: | ||||
|     name: Build Nightly CI Docker Images | ||||
|     if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci')) | ||||
|     uses: ./.github/workflows/build-nightly-ci-docker-images.yml | ||||
|     with: | ||||
|       job: latest-with-torch-nightly-docker | ||||
|     secrets: inherit | ||||
|  | ||||
|   setup: | ||||
|     name: Setup | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: Setup | ||||
|         run: | | ||||
|           mkdir "setup_values" | ||||
|           echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt" | ||||
|           echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt" | ||||
|  | ||||
|       - name: Upload artifacts | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: setup_values | ||||
|           path: setup_values | ||||
|  | ||||
|   model-ci: | ||||
|     name: Model CI | ||||
|     needs: build_nightly_torch_ci_images | ||||
|     needs: [build_nightly_ci_images] | ||||
|     uses: ./.github/workflows/self-scheduled.yml | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#transformers-ci-past-future" | ||||
|       runner: ci | ||||
|       docker: huggingface/transformers-all-latest-torch-nightly-gpu | ||||
|       ci_event: Nightly CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci_with_torch_nightly | ||||
|       commit_sha: ${{ github.event.workflow_run.head_sha || github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   deepspeed-ci: | ||||
|     name: DeepSpeed CI | ||||
|     needs: [build_nightly_ci_images] | ||||
|     uses: ./.github/workflows/self-scheduled.yml | ||||
|     with: | ||||
|       job: run_torch_cuda_extensions_gpu | ||||
|       slack_report_channel: "#transformers-ci-past-future" | ||||
|       runner: ci | ||||
|       # test deepspeed nightly build with the latest release torch | ||||
|       docker: huggingface/transformers-pytorch-deepspeed-latest-gpu | ||||
|       ci_event: Nightly CI | ||||
|       working-directory-prefix: /workspace | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										25
									
								
								.github/workflows/self-push-amd-mi300-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								.github/workflows/self-push-amd-mi300-caller.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| name: Self-hosted runner (AMD mi300 CI caller) | ||||
|  | ||||
| on: | ||||
|   #workflow_run: | ||||
|   #  workflows: ["Self-hosted runner (push-caller)"] | ||||
|   #  branches: ["main"] | ||||
|   #  types: [completed] | ||||
|   push: | ||||
|     branches: | ||||
|       - run_amd_push_ci_caller* | ||||
|     paths: | ||||
|       - "src/**" | ||||
|       - "tests/**" | ||||
|       - ".github/**" | ||||
|       - "templates/**" | ||||
|       - "utils/**" | ||||
|  | ||||
| jobs: | ||||
|   run_amd_ci: | ||||
|     name: AMD mi300 | ||||
|     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) | ||||
|     uses: ./.github/workflows/self-push-amd.yml | ||||
|     with: | ||||
|       gpu_flavor: mi300 | ||||
|     secrets: inherit | ||||
							
								
								
									
										32
									
								
								.github/workflows/self-push.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										32
									
								
								.github/workflows/self-push.yml
									
									
									
									
										vendored
									
									
								
							| @ -31,12 +31,12 @@ jobs: | ||||
|     name: Setup | ||||
|     strategy: | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       matrix: ${{ steps.set-matrix.outputs.matrix }} | ||||
|       test_map: ${{ steps.set-matrix.outputs.test_map }} | ||||
| @ -131,12 +131,12 @@ jobs: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup.outputs.matrix) }} | ||||
|         machine_type: [aws-g5-4xlarge-cache] | ||||
|         machine_type: [aws-g4dn-2xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
| @ -169,9 +169,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -244,7 +244,7 @@ jobs: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup.outputs.matrix) }} | ||||
|         machine_type: [aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -282,9 +282,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -357,12 +357,12 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache] | ||||
|         machine_type: [aws-g4dn-2xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     env: | ||||
|       # For the meaning of these environment variables, see the job `Setup` | ||||
|       CI_BRANCH_PUSH: ${{ github.event.ref }} | ||||
| @ -395,9 +395,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -467,7 +467,7 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -505,9 +505,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
|  | ||||
| @ -1,8 +1,8 @@ | ||||
| name: Self-hosted runner scale set (AMD mi325 scheduled CI caller) | ||||
| name: Self-hosted runner scale set (AMD mi300 scheduled CI caller) | ||||
| 
 | ||||
| # Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml | ||||
| # For example, 1gpu scale set: amd-mi325-ci-1gpu | ||||
| #              2gpu scale set: amd-mi325-ci-2gpu | ||||
| # For example, 1gpu scale set: amd-mi300-ci-1gpu | ||||
| #              2gpu scale set: amd-mi300-ci-2gpu | ||||
| 
 | ||||
| on: | ||||
|   workflow_run: | ||||
| @ -20,11 +20,10 @@ jobs: | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: amd-mi325 | ||||
|       runner_scale_set: amd-mi300-ci | ||||
|       docker: huggingface/transformers-pytorch-amd-gpu | ||||
|       ci_event: Scheduled CI (AMD) - mi325 | ||||
|       ci_event: Scheduled CI (AMD) - mi300 | ||||
|       report_repo_id: optimum-amd/transformers_daily_ci | ||||
|       env_file: /etc/podinfo/gha-gpu-isolation-settings | ||||
|     secrets: inherit | ||||
| 
 | ||||
|   torch-pipeline: | ||||
| @ -33,11 +32,10 @@ jobs: | ||||
|     with: | ||||
|       job: run_pipelines_torch_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: amd-mi325 | ||||
|       runner_scale_set: amd-mi300-ci | ||||
|       docker: huggingface/transformers-pytorch-amd-gpu | ||||
|       ci_event: Scheduled CI (AMD) - mi325 | ||||
|       ci_event: Scheduled CI (AMD) - mi300 | ||||
|       report_repo_id: optimum-amd/transformers_daily_ci | ||||
|       env_file: /etc/podinfo/gha-gpu-isolation-settings | ||||
|     secrets: inherit | ||||
| 
 | ||||
|   example-ci: | ||||
| @ -46,11 +44,10 @@ jobs: | ||||
|     with: | ||||
|       job: run_examples_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: amd-mi325 | ||||
|       runner_scale_set: amd-mi300-ci | ||||
|       docker: huggingface/transformers-pytorch-amd-gpu | ||||
|       ci_event: Scheduled CI (AMD) - mi325 | ||||
|       ci_event: Scheduled CI (AMD) - mi300 | ||||
|       report_repo_id: optimum-amd/transformers_daily_ci | ||||
|       env_file: /etc/podinfo/gha-gpu-isolation-settings | ||||
|     secrets: inherit | ||||
| 
 | ||||
|   deepspeed-ci: | ||||
| @ -59,9 +56,8 @@ jobs: | ||||
|     with: | ||||
|       job: run_torch_cuda_extensions_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: amd-mi325 | ||||
|       runner_scale_set: amd-mi300-ci | ||||
|       docker: huggingface/transformers-pytorch-deepspeed-amd-gpu | ||||
|       ci_event: Scheduled CI (AMD) - mi325 | ||||
|       ci_event: Scheduled CI (AMD) - mi300 | ||||
|       report_repo_id: optimum-amd/transformers_daily_ci | ||||
|       env_file: /etc/podinfo/gha-gpu-isolation-settings | ||||
|     secrets: inherit | ||||
| @ -1,63 +0,0 @@ | ||||
| name: Self-hosted runner scale set (AMD mi355 scheduled CI caller) | ||||
|  | ||||
| # Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml | ||||
| # For example, 1gpu : amd-mi355-ci-1gpu | ||||
| #              2gpu : amd-mi355-ci-2gpu | ||||
|   | ||||
| on: | ||||
|   workflow_run: | ||||
|     workflows: ["Self-hosted runner (AMD scheduled CI caller)"] | ||||
|     branches: ["main"] | ||||
|     types: [completed] | ||||
|   push: | ||||
|     branches: | ||||
|       - run_amd_scheduled_ci_caller* | ||||
|  | ||||
| jobs: | ||||
|   model-ci: | ||||
|     name: Model CI | ||||
|     uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main | ||||
|     with: | ||||
|       job: run_models_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: hfc-amd-mi355 | ||||
|       docker: huggingface/testing-rocm7.0-preview | ||||
|       ci_event: Scheduled CI (AMD) - mi355 | ||||
|       report_repo_id: hf-transformers-bot/transformers-ci-dummy | ||||
|     secrets: inherit | ||||
|  | ||||
|   torch-pipeline: | ||||
|     name: Torch pipeline CI | ||||
|     uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main | ||||
|     with: | ||||
|       job: run_pipelines_torch_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: hfc-amd-mi355 | ||||
|       docker: huggingface/testing-rocm7.0-preview | ||||
|       ci_event: Scheduled CI (AMD) - mi355 | ||||
|       report_repo_id: hf-transformers-bot/transformers-ci-dummy | ||||
|     secrets: inherit | ||||
|  | ||||
|   example-ci: | ||||
|     name: Example CI | ||||
|     uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main | ||||
|     with: | ||||
|       job: run_examples_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: hfc-amd-mi355 | ||||
|       docker: huggingface/testing-rocm7.0-preview | ||||
|       ci_event: Scheduled CI (AMD) - mi355 | ||||
|       report_repo_id: hf-transformers-bot/transformers-ci-dummy | ||||
|     secrets: inherit | ||||
|  | ||||
|   deepspeed-ci: | ||||
|     name: DeepSpeed CI | ||||
|     uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main | ||||
|     with:   | ||||
|       job: run_torch_cuda_extensions_gpu | ||||
|       slack_report_channel: "#amd-hf-ci" | ||||
|       runner_group: hfc-amd-mi355 | ||||
|       docker: huggingface/testing-rocm7.0-preview | ||||
|       ci_event: Scheduled CI (AMD) - mi355 | ||||
|       report_repo_id: hf-transformers-bot/transformers-ci-dummy | ||||
|     secrets: inherit | ||||
							
								
								
									
										13
									
								
								.github/workflows/self-scheduled-caller.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								.github/workflows/self-scheduled-caller.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,4 +1,5 @@ | ||||
| name: Nvidia CI | ||||
| name: Self-hosted runner (scheduled) | ||||
|  | ||||
|  | ||||
| on: | ||||
|   repository_dispatch: | ||||
| @ -6,7 +7,7 @@ on: | ||||
|     - cron: "17 2 * * *" | ||||
|   push: | ||||
|     branches: | ||||
|       - run_nvidia_ci* | ||||
|       - run_scheduled_ci* | ||||
|   workflow_dispatch: | ||||
|     inputs: | ||||
|       prev_workflow_run_id: | ||||
| @ -52,9 +53,7 @@ jobs: | ||||
|       slack_report_channel: "#transformers-ci-daily-models" | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       ci_event: Daily CI | ||||
|       runner_type: "a10" | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   torch-pipeline: | ||||
| @ -66,7 +65,6 @@ jobs: | ||||
|       docker: huggingface/transformers-pytorch-gpu | ||||
|       ci_event: Daily CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   example-ci: | ||||
| @ -78,7 +76,6 @@ jobs: | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       ci_event: Daily CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   trainer-fsdp-ci: | ||||
| @ -88,10 +85,8 @@ jobs: | ||||
|       job: run_trainer_and_fsdp_gpu | ||||
|       slack_report_channel: "#transformers-ci-daily-training" | ||||
|       docker: huggingface/transformers-all-latest-gpu | ||||
|       runner_type: "a10" | ||||
|       ci_event: Daily CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   deepspeed-ci: | ||||
| @ -104,7 +99,6 @@ jobs: | ||||
|       ci_event: Daily CI | ||||
|       working-directory-prefix: /workspace | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   quantization-ci: | ||||
| @ -116,5 +110,4 @@ jobs: | ||||
|       docker: huggingface/transformers-quantization-latest-gpu | ||||
|       ci_event: Daily CI | ||||
|       report_repo_id: hf-internal-testing/transformers_daily_ci | ||||
|       commit_sha: ${{ github.sha }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										36
									
								
								.github/workflows/self-scheduled-intel-gaudi.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								.github/workflows/self-scheduled-intel-gaudi.yml
									
									
									
									
										vendored
									
									
								
							| @ -26,6 +26,7 @@ env: | ||||
|   TRANSFORMERS_IS_CI: yes | ||||
|   PT_ENABLE_INT64_SUPPORT: 1 | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   HF_HOME: /mnt/cache/.cache/huggingface | ||||
|  | ||||
| jobs: | ||||
| @ -83,6 +84,8 @@ jobs: | ||||
|       machine_type: ${{ matrix.machine_type }} | ||||
|       folder_slices: ${{ needs.setup.outputs.folder_slices }} | ||||
|       runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }} | ||||
|       report_name_prefix: run_models_gpu | ||||
|  | ||||
|     secrets: inherit | ||||
|  | ||||
|   run_trainer_and_fsdp_gpu: | ||||
| @ -101,10 +104,11 @@ jobs: | ||||
|       folder_slices: ${{ needs.setup.outputs.folder_slices }} | ||||
|       runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }} | ||||
|       report_name_prefix: run_trainer_and_fsdp_gpu | ||||
|  | ||||
|     secrets: inherit | ||||
|  | ||||
|   run_pipelines_torch_gpu: | ||||
|     if: ${{ inputs.job == 'run_pipelines_torch_gpu' }} | ||||
|   run_pipelines_gpu: | ||||
|     if: ${{ inputs.job == 'run_pipelines_gpu' }} | ||||
|     name: Pipelines | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
| @ -157,20 +161,20 @@ jobs: | ||||
|  | ||||
|       - name: Run all pipeline tests on Intel Gaudi | ||||
|         run: | | ||||
|           python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" | ||||
|           python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_gpu_test_reports tests/pipelines -m "not not_device_test" | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: | | ||||
|           cat reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt | ||||
|           cat reports/${{ env.machine_type }}_run_pipelines_gpu_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports" | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_gpu_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports | ||||
|           path: reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports | ||||
|           name: ${{ env.machine_type }}_run_pipelines_gpu_test_reports | ||||
|           path: reports/${{ env.machine_type }}_run_pipelines_gpu_test_reports | ||||
|  | ||||
|   run_examples_gpu: | ||||
|     if: ${{ inputs.job == 'run_examples_gpu' }} | ||||
| @ -244,8 +248,8 @@ jobs: | ||||
|           name: ${{ env.machine_type }}_run_examples_gpu_test_reports | ||||
|           path: reports/${{ env.machine_type }}_run_examples_gpu_test_reports | ||||
|  | ||||
|   run_torch_cuda_extensions_gpu: | ||||
|     if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} | ||||
|   run_deepspeed_gpu: | ||||
|     if: ${{ inputs.job == 'run_deepspeed_gpu' }} | ||||
|     name: Intel Gaudi deepspeed tests | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
| @ -301,20 +305,20 @@ jobs: | ||||
|  | ||||
|       - name: Run all deepspeed tests on intel Gaudi | ||||
|         run: | | ||||
|           python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed -m "not not_device_test" | ||||
|           python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_deepspeed_gpu_test_reports tests/deepspeed -m "not not_device_test" | ||||
|  | ||||
|       - name: Failure short reports | ||||
|         if: ${{ failure() }} | ||||
|         continue-on-error: true | ||||
|         run: | | ||||
|           cat reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt | ||||
|           cat reports/${{ env.machine_type }}_run_deepspeed_gpu_test_reports/failures_short.txt | ||||
|  | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" | ||||
|       - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_deepspeed_gpu_test_reports" | ||||
|         if: ${{ always() }} | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|           path: reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | ||||
|           name: ${{ env.machine_type }}_run_deepspeed_gpu_test_reports | ||||
|           path: reports/${{ env.machine_type }}_run_deepspeed_gpu_test_reports | ||||
|  | ||||
|   send_results: | ||||
|     name: Slack Report | ||||
| @ -323,8 +327,8 @@ jobs: | ||||
|         setup, | ||||
|         run_models_gpu, | ||||
|         run_examples_gpu, | ||||
|         run_torch_cuda_extensions_gpu, | ||||
|         run_pipelines_torch_gpu, | ||||
|         run_pipelines_gpu, | ||||
|         run_deepspeed_gpu, | ||||
|         run_trainer_and_fsdp_gpu, | ||||
|       ] | ||||
|     if: ${{ always() }} | ||||
|  | ||||
| @ -23,7 +23,7 @@ jobs: | ||||
|     name: Pipeline CI | ||||
|     uses: ./.github/workflows/self-scheduled-intel-gaudi.yml | ||||
|     with: | ||||
|       job: run_pipelines_torch_gpu | ||||
|       job: run_pipelines_gpu | ||||
|       ci_event: Scheduled CI (Intel) - Gaudi3 | ||||
|       runner_scale_set: itac-bm-emr-gaudi3-dell | ||||
|       slack_report_channel: "#transformers-ci-daily-intel-gaudi3" | ||||
| @ -47,7 +47,7 @@ jobs: | ||||
|     name: DeepSpeed CI | ||||
|     uses: ./.github/workflows/self-scheduled-intel-gaudi.yml | ||||
|     with: | ||||
|       job: run_torch_cuda_extensions_gpu | ||||
|       job: run_deepspeed_gpu | ||||
|       ci_event: Scheduled CI (Intel) - Gaudi3 | ||||
|       runner_scale_set: itac-bm-emr-gaudi3-dell | ||||
|       slack_report_channel: "#transformers-ci-daily-intel-gaudi3" | ||||
|  | ||||
							
								
								
									
										77
									
								
								.github/workflows/self-scheduled.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										77
									
								
								.github/workflows/self-scheduled.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,4 +1,4 @@ | ||||
| name: Nvidia CI (job definitions) | ||||
| name: Self-hosted runner (scheduled) | ||||
|  | ||||
| # Note that each job's dependencies go into a corresponding docker file. | ||||
| # | ||||
| @ -28,16 +28,7 @@ on: | ||||
|       report_repo_id: | ||||
|         required: true | ||||
|         type: string | ||||
|       commit_sha: | ||||
|         required: false | ||||
|         type: string | ||||
|       runner_type: | ||||
|         required: false | ||||
|         type: string | ||||
|       models: | ||||
|         default: "" | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
|  | ||||
| env: | ||||
|   HF_HOME: /mnt/cache | ||||
| @ -48,31 +39,33 @@ env: | ||||
|   # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||||
|   # This token is created under the bot `hf-transformers-bot`. | ||||
|   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|   NUM_SLICES: 2 | ||||
|  | ||||
| jobs: | ||||
|   setup: | ||||
|     name: Setup | ||||
|     if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job) | ||||
|     name: Setup | ||||
|     strategy: | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     outputs: | ||||
|       folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} | ||||
|       slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} | ||||
|       runner_map: ${{ steps.set-matrix.outputs.runner_map }} | ||||
|       quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }} | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         run: | | ||||
|           git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|           git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Cleanup | ||||
|         working-directory: /transformers | ||||
| @ -91,8 +84,9 @@ jobs: | ||||
|         working-directory: /transformers/tests | ||||
|         run: | | ||||
|           if [ "${{ inputs.job }}" = "run_models_gpu" ]; then | ||||
|             echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT | ||||
|             echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT | ||||
|             echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT | ||||
|             echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT | ||||
|           elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then | ||||
|             echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT | ||||
|             echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT | ||||
| @ -116,17 +110,15 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [single-gpu, multi-gpu] | ||||
|         slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} | ||||
|     uses: ./.github/workflows/model_jobs.yml | ||||
|     with: | ||||
|       folder_slices: ${{ needs.setup.outputs.folder_slices }} | ||||
|       machine_type: ${{ matrix.machine_type }} | ||||
|       slice_id: ${{ matrix.slice_id }} | ||||
|       runner_map: ${{ needs.setup.outputs.runner_map }} | ||||
|       docker: ${{ inputs.docker }} | ||||
|       commit_sha: ${{ inputs.commit_sha || github.sha }} | ||||
|       runner_type: ${{ inputs.runner_type }} | ||||
|       report_repo_id: ${{ inputs.report_repo_id }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   run_trainer_and_fsdp_gpu: | ||||
| @ -136,7 +128,7 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|         slice_id: [0, 1] | ||||
|     uses: ./.github/workflows/model_jobs.yml | ||||
|     with: | ||||
| @ -144,9 +136,6 @@ jobs: | ||||
|       machine_type: ${{ matrix.machine_type }} | ||||
|       slice_id: ${{ matrix.slice_id }} | ||||
|       docker: ${{ inputs.docker }} | ||||
|       commit_sha: ${{ inputs.commit_sha || github.sha }} | ||||
|       runner_type: ${{ inputs.runner_type }} | ||||
|       report_repo_id: ${{ inputs.report_repo_id }} | ||||
|       report_name_prefix: run_trainer_and_fsdp_gpu | ||||
|     secrets: inherit | ||||
|  | ||||
| @ -156,7 +145,7 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -165,7 +154,7 @@ jobs: | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
| @ -190,9 +179,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -224,16 +213,16 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
|       image: huggingface/transformers-all-latest-gpu | ||||
|       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
| @ -258,9 +247,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -293,7 +282,7 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -302,7 +291,7 @@ jobs: | ||||
|     steps: | ||||
|       - name: Update clone | ||||
|         working-directory: ${{ inputs.working-directory-prefix }}/transformers | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: ${{ inputs.working-directory-prefix }}/transformers | ||||
| @ -355,9 +344,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -392,7 +381,7 @@ jobs: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }} | ||||
|         machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache] | ||||
|         machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] | ||||
|     runs-on: | ||||
|       group: '${{ matrix.machine_type }}' | ||||
|     container: | ||||
| @ -410,7 +399,7 @@ jobs: | ||||
|  | ||||
|       - name: Update clone | ||||
|         working-directory: /transformers | ||||
|         run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} | ||||
|         run: git fetch && git checkout ${{ github.sha }} | ||||
|  | ||||
|       - name: Reinstall transformers in edit mode (remove the one installed during docker image build) | ||||
|         working-directory: /transformers | ||||
| @ -435,9 +424,9 @@ jobs: | ||||
|         run: | | ||||
|           echo "${{ matrix.machine_type }}" | ||||
|  | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then | ||||
|           if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then | ||||
|             machine_type=single-gpu | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then | ||||
|           elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | ||||
|             machine_type=multi-gpu | ||||
|           else | ||||
|             machine_type=${{ matrix.machine_type }} | ||||
| @ -474,7 +463,6 @@ jobs: | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           fetch-depth: 2 | ||||
|           ref: ${{ inputs.commit_sha || github.sha }} | ||||
|  | ||||
|       - name: Install transformers | ||||
|         run: pip install transformers | ||||
| @ -517,7 +505,7 @@ jobs: | ||||
|       run_quantization_torch_gpu, | ||||
|       run_extract_warnings | ||||
|     ] | ||||
|     if: always() && !cancelled() | ||||
|     if: ${{ always() }} | ||||
|     uses: ./.github/workflows/slack-report.yml | ||||
|     with: | ||||
|       job: ${{ inputs.job }} | ||||
| @ -529,7 +517,6 @@ jobs: | ||||
|       quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} | ||||
|       ci_event: ${{ inputs.ci_event }} | ||||
|       report_repo_id: ${{ inputs.report_repo_id }} | ||||
|       commit_sha: ${{ inputs.commit_sha || github.sha }} | ||||
|  | ||||
|     secrets: inherit | ||||
|  | ||||
| @ -540,7 +527,7 @@ jobs: | ||||
|     uses: ./.github/workflows/check_failed_tests.yml | ||||
|     with: | ||||
|       docker: ${{ inputs.docker }} | ||||
|       start_sha: ${{ inputs.commit_sha || github.sha }} | ||||
|       start_sha: ${{ github.sha }} | ||||
|       job: ${{ inputs.job }} | ||||
|       slack_report_channel: ${{ inputs.slack_report_channel }} | ||||
|       ci_event: ${{ inputs.ci_event }} | ||||
|  | ||||
							
								
								
									
										16
									
								
								.github/workflows/slack-report.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/workflows/slack-report.yml
									
									
									
									
										vendored
									
									
								
							| @ -24,10 +24,6 @@ on: | ||||
|       report_repo_id: | ||||
|         required: true | ||||
|         type: string | ||||
|       commit_sha: | ||||
|         required: false | ||||
|         type: string | ||||
|  | ||||
|  | ||||
| env: | ||||
|   TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} | ||||
| @ -36,7 +32,7 @@ jobs: | ||||
|   send_results: | ||||
|     name: Send results to webhook | ||||
|     runs-on: ubuntu-22.04 | ||||
|     if: always() && !cancelled() | ||||
|     if: always() | ||||
|     steps: | ||||
|       - name: Preliminary job status | ||||
|         shell: bash | ||||
| @ -45,10 +41,6 @@ jobs: | ||||
|           echo "Setup status: ${{ inputs.setup_status }}" | ||||
|  | ||||
|       - uses: actions/checkout@v4 | ||||
|         with: | ||||
|           fetch-depth: 2 | ||||
|           ref: ${{ inputs.commit_sha || github.sha }} | ||||
|  | ||||
|       - uses: actions/download-artifact@v4 | ||||
|  | ||||
|       - name: Prepare some setup values | ||||
| @ -75,9 +67,7 @@ jobs: | ||||
|           SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} | ||||
|           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | ||||
|           CI_EVENT: ${{ inputs.ci_event }} | ||||
|           # This `CI_TITLE` would be empty for `schedule` or `workflow_run` events. | ||||
|           CI_TITLE: ${{ github.event.head_commit.message }} | ||||
|           CI_SHA: ${{ inputs.commit_sha || github.sha }} | ||||
|           CI_SHA: ${{ github.sha }} | ||||
|           CI_TEST_JOB: ${{ inputs.job }} | ||||
|           SETUP_STATUS: ${{ inputs.setup_status }} | ||||
|           REPORT_REPO_ID: ${{ inputs.report_repo_id }} | ||||
| @ -93,7 +83,7 @@ jobs: | ||||
|             python utils/notification_service.py "${{ inputs.quantization_matrix }}" | ||||
|           else | ||||
|             python utils/notification_service.py "${{ inputs.folder_slices }}" | ||||
|           fi | ||||
|           fi           | ||||
|  | ||||
|       # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. | ||||
|       - name: Failure table artifacts | ||||
|  | ||||
							
								
								
									
										18
									
								
								.github/workflows/ssh-runner.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/workflows/ssh-runner.yml
									
									
									
									
										vendored
									
									
								
							| @ -20,6 +20,7 @@ env: | ||||
|   OMP_NUM_THREADS: 8 | ||||
|   MKL_NUM_THREADS: 8 | ||||
|   RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. | ||||
|   SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | ||||
|   TF_FORCE_GPU_ALLOW_GROWTH: true | ||||
|   CUDA_VISIBLE_DEVICES: 0,1 | ||||
|  | ||||
| @ -32,17 +33,14 @@ jobs: | ||||
|     steps: | ||||
|       - name: Get runner to use | ||||
|         shell: bash | ||||
|         env: | ||||
|           NUM_GPUS: ${{ github.event.inputs.num_gpus }} | ||||
|           RUNNER_TYPE: ${{ github.event.inputs.runner_type }} | ||||
|         run: | | ||||
|           if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then | ||||
|           if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then | ||||
|             echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV | ||||
|           elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then | ||||
|           elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then | ||||
|             echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV | ||||
|           elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then | ||||
|           elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then | ||||
|             echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV | ||||
|           elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then | ||||
|           elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then | ||||
|             echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV | ||||
|           else | ||||
|             echo "RUNNER=" >> $GITHUB_ENV | ||||
| @ -87,11 +85,9 @@ jobs: | ||||
|       - name: Store Slack infos | ||||
|         #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step | ||||
|         shell: bash | ||||
|         env: | ||||
|           GITHUB_ACTOR: ${{ github.actor }} | ||||
|         run: | | ||||
|           echo "$GITHUB_ACTOR" | ||||
|           github_actor=$GITHUB_ACTOR | ||||
|           echo "${{ github.actor }}" | ||||
|           github_actor=${{ github.actor }} | ||||
|           github_actor=${github_actor/'-'/'_'} | ||||
|           echo "$github_actor" | ||||
|           echo "github_actor=$github_actor" >> $GITHUB_ENV | ||||
|  | ||||
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -13,7 +13,6 @@ tests/fixtures/cached_*_text.txt | ||||
| logs/ | ||||
| lightning_logs/ | ||||
| lang_code_data/ | ||||
| reports/ | ||||
|  | ||||
| # Distribution / packaging | ||||
| .Python | ||||
| @ -168,6 +167,3 @@ tags | ||||
|  | ||||
| # ruff | ||||
| .ruff_cache | ||||
|  | ||||
| # modular conversion | ||||
| *.modular_backup | ||||
|  | ||||
| @ -68,7 +68,8 @@ already reported** (use the search bar on GitHub under Issues). Your issue shoul | ||||
|  | ||||
| Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it: | ||||
|  | ||||
| * Your **OS type and version** and **Python**, and **PyTorch** versions when applicable. | ||||
| * Your **OS type and version** and **Python**, **PyTorch** and | ||||
|   **TensorFlow** versions when applicable. | ||||
| * A short, self-contained, code snippet that allows us to reproduce the bug in | ||||
|   less than 30s. | ||||
| * The *full* traceback if an exception is raised. | ||||
| @ -164,7 +165,8 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main | ||||
|    mode with the `-e` flag. | ||||
|  | ||||
|    Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a | ||||
|    failure with this command. If that's the case make sure to install Pytorch then do: | ||||
|    failure with this command. If that's the case make sure to install the Deep Learning framework you are working with | ||||
|    (PyTorch, TensorFlow and/or Flax) then do: | ||||
|  | ||||
|    ```bash | ||||
|    pip install -e ".[quality]" | ||||
| @ -278,14 +280,13 @@ are working on it).<br> | ||||
| useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.<br> | ||||
| ☐ Make sure existing tests pass.<br> | ||||
| ☐ If adding a new feature, also add tests for it.<br> | ||||
|  | ||||
| - If you are adding a new model, make sure you use | ||||
|    - If you are adding a new model, make sure you use | ||||
|      `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)` to trigger the common tests. | ||||
| - If you are adding new `@slow` tests, make sure they pass using | ||||
|    - If you are adding new `@slow` tests, make sure they pass using | ||||
|      `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`. | ||||
| - If you are adding a new tokenizer, write tests and make sure | ||||
|    - If you are adding a new tokenizer, write tests and make sure | ||||
|      `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py` passes. | ||||
| - CircleCI does not run the slow tests, but GitHub Actions does every night!<br> | ||||
|    - CircleCI does not run the slow tests, but GitHub Actions does every night!<br> | ||||
|  | ||||
| ☐ All public methods must have informative docstrings (see | ||||
| [`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py) | ||||
| @ -341,7 +342,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t | ||||
| ``` | ||||
|  | ||||
| Like the slow tests, there are other environment variables available which are not enabled by default during testing: | ||||
|  | ||||
| - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers. | ||||
|  | ||||
| More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py). | ||||
|  | ||||
| @ -38,6 +38,7 @@ In particular all "Please explain" questions or objectively very user-specific f | ||||
|  | ||||
| * "How to train T5 on De->En translation?" | ||||
|  | ||||
|  | ||||
| ## The GitHub Issues | ||||
|  | ||||
| Everything which hints at a bug should be opened as an [issue](https://github.com/huggingface/transformers/issues). | ||||
| @ -153,7 +154,7 @@ You are not required to read the following guidelines before opening an issue. H | ||||
|     cd examples/seq2seq | ||||
|     torchrun --nproc_per_node=2 ./finetune_trainer.py \ | ||||
|     --model_name_or_path sshleifer/distill-mbart-en-ro-12-4 --data_dir wmt_en_ro \ | ||||
|     --output_dir output_dir \ | ||||
|     --output_dir output_dir --overwrite_output_dir \ | ||||
|     --do_train --n_train 500 --num_train_epochs 1 \ | ||||
|     --per_device_train_batch_size 1  --freeze_embeds \ | ||||
|     --src_lang en_XX --tgt_lang ro_RO --task translation \ | ||||
| @ -246,6 +247,7 @@ You are not required to read the following guidelines before opening an issue. H | ||||
|  | ||||
|     Try not use italics and bold text too much as these often make the text more difficult to read. | ||||
|  | ||||
|  | ||||
| 12. If you are cross-referencing a specific comment in a given thread or another issue, always link to that specific comment, rather than using the issue link. If you do the latter it could be quite impossible to find which specific comment you're referring to. | ||||
|  | ||||
|     To get the link to the specific comment do not copy the url from the location bar of your browser, but instead, click the `...` icon in the upper right corner of the comment and then select "Copy Link". | ||||
| @ -255,6 +257,7 @@ You are not required to read the following guidelines before opening an issue. H | ||||
|     1. https://github.com/huggingface/transformers/issues/9257 | ||||
|     2. https://github.com/huggingface/transformers/issues/9257#issuecomment-749945162 | ||||
|  | ||||
|  | ||||
| 13. If you are replying to a last comment, it's totally fine to make your reply with just your comment in it. The readers can follow the information flow here. | ||||
|  | ||||
|     But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like: | ||||
|  | ||||
							
								
								
									
										3
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Makefile
									
									
									
									
									
								
							| @ -3,7 +3,7 @@ | ||||
| # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) | ||||
| export PYTHONPATH = src | ||||
|  | ||||
| check_dirs := examples tests src utils scripts benchmark benchmark_v2 | ||||
| check_dirs := examples tests src utils | ||||
|  | ||||
| exclude_folders :=  "" | ||||
|  | ||||
| @ -52,7 +52,6 @@ repo-consistency: | ||||
| 	python utils/check_doctest_list.py | ||||
| 	python utils/update_metadata.py --check-only | ||||
| 	python utils/check_docstrings.py | ||||
| 	python utils/add_dates.py | ||||
|  | ||||
| # this target runs checks on all files | ||||
|  | ||||
|  | ||||
							
								
								
									
										28
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								README.md
									
									
									
									
									
								
							| @ -44,15 +44,13 @@ limitations under the License. | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Português</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_it.md">Italiano</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> | | ||||
|         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_bn.md">বাংলা</a> | | ||||
|     </p> | ||||
| </h4> | ||||
|  | ||||
| @ -64,11 +62,12 @@ limitations under the License. | ||||
|     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/> | ||||
| </h3> | ||||
|  | ||||
| Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer | ||||
| vision, audio, video, and multimodal model, for both inference and training. | ||||
|  | ||||
| It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the | ||||
| pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training | ||||
| Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer  | ||||
| vision, audio, video, and multimodal model, for both inference and training.  | ||||
|  | ||||
| It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the  | ||||
| pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training  | ||||
| frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...), | ||||
| and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from `transformers`. | ||||
|  | ||||
| @ -81,7 +80,7 @@ Explore the [Hub](https://huggingface.com/) today to find a model and use Transf | ||||
|  | ||||
| ## Installation | ||||
|  | ||||
| Transformers works with Python 3.9+, and [PyTorch](https://pytorch.org/get-started/locally/) 2.1+. | ||||
| Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.1+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+. | ||||
|  | ||||
| Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager. | ||||
|  | ||||
| @ -111,10 +110,10 @@ git clone https://github.com/huggingface/transformers.git | ||||
| cd transformers | ||||
|  | ||||
| # pip | ||||
| pip install '.[torch]' | ||||
| pip install .[torch] | ||||
|  | ||||
| # uv | ||||
| uv pip install '.[torch]' | ||||
| uv pip install .[torch] | ||||
| ``` | ||||
|  | ||||
| ## Quickstart | ||||
| @ -148,7 +147,7 @@ chat = [ | ||||
|     {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"} | ||||
| ] | ||||
|  | ||||
| pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", dtype=torch.bfloat16, device_map="auto") | ||||
| pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto") | ||||
| response = pipeline(chat, max_new_tokens=512) | ||||
| print(response[0]["generated_text"][-1]["content"]) | ||||
| ``` | ||||
| @ -194,6 +193,7 @@ pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.pn | ||||
| <details> | ||||
| <summary>Visual question answering</summary> | ||||
|  | ||||
|  | ||||
| <h3 align="center"> | ||||
|     <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg"></a> | ||||
| </h3> | ||||
| @ -242,7 +242,7 @@ pipeline( | ||||
|  | ||||
| - This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files. | ||||
| - The training API is optimized to work with PyTorch models provided by Transformers. For generic machine learning loops, you should use another library like [Accelerate](https://huggingface.co/docs/accelerate). | ||||
| - The [example scripts](https://github.com/huggingface/transformers/tree/main/examples) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work. | ||||
| - The [example scripts]((https://github.com/huggingface/transformers/tree/main/examples)) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work. | ||||
|  | ||||
| ## 100 projects using Transformers | ||||
|  | ||||
| @ -280,8 +280,8 @@ Expand each modality below to see a few example models for various use cases. | ||||
| - Automatic mask generation with [SAM](https://huggingface.co/facebook/sam-vit-base) | ||||
| - Depth estimation with [DepthPro](https://huggingface.co/apple/DepthPro-hf) | ||||
| - Image classification with [DINO v2](https://huggingface.co/facebook/dinov2-base) | ||||
| - Keypoint detection with [SuperPoint](https://huggingface.co/magic-leap-community/superpoint) | ||||
| - Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor) | ||||
| - Keypoint detection with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor) | ||||
| - Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue) | ||||
| - Object detection with [RT-DETRv2](https://huggingface.co/PekingU/rtdetr_v2_r50vd) | ||||
| - Pose Estimation with [VitPose](https://huggingface.co/usyd-community/vitpose-base-simple) | ||||
| - Universal segmentation with [OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_swin_large) | ||||
|  | ||||
| @ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re | ||||
| models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized | ||||
| by the transformers library), as developed specifically to prevent arbitrary code execution on your system. | ||||
|  | ||||
| To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model. | ||||
| To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model. | ||||
|  | ||||
| ### Remote code | ||||
|  | ||||
|  | ||||
| @ -6,7 +6,7 @@ developers, researchers, students, professors, engineers, and anyone else to bui | ||||
|  | ||||
| In this list, we showcase incredibly impactful and novel projects that have pushed the field forward. We celebrate | ||||
| 100 of these projects as we reach the milestone of 100k stars as a community; but we're very open to pull requests | ||||
| adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR | ||||
| adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR  | ||||
| to add it. | ||||
|  | ||||
| ## [gpt4all](https://github.com/nomic-ai/gpt4all) | ||||
| @ -49,7 +49,7 @@ Keywords: LLMs, Large Language Models, Agents, Chains | ||||
|  | ||||
| [LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results. | ||||
|  | ||||
| Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation | ||||
| Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation  | ||||
|  | ||||
| ## [ParlAI](https://github.com/facebookresearch/ParlAI) | ||||
|  | ||||
| @ -257,7 +257,7 @@ Stable-Dreamfusion is a pytorch implementation of the text-to-3D model Dreamfusi | ||||
| Keywords: Text-to-3D, Stable Diffusion | ||||
|  | ||||
| ## [txtai](https://github.com/neuml/txtai) | ||||
|  | ||||
|   | ||||
| [txtai](https://github.com/neuml/txtai) is an open-source platform for semantic search and workflows powered by language models. txtai builds embeddings databases, which are a union of vector indexes and relational databases enabling similarity search with SQL. Semantic workflows connect language models together into unified applications. | ||||
|  | ||||
| Keywords: Semantic search, LLM | ||||
| @ -288,7 +288,7 @@ Keywords: Music understanding, Music generation | ||||
|  | ||||
| ## [dalle-flow](https://github.com/jina-ai/dalle-flow) | ||||
|  | ||||
| DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. It leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt. | ||||
| DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. Itt leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt. | ||||
| The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR. | ||||
|  | ||||
| Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR | ||||
| @ -309,8 +309,8 @@ Keywords: OCR, LaTeX, Math formula | ||||
|  | ||||
| OpenCLIP is an open source implementation of OpenAI's CLIP. | ||||
|  | ||||
| The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift. | ||||
| The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset. | ||||
| The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift.  | ||||
| The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset.  | ||||
|  | ||||
| Specifically, a ResNet-50 model trained with this codebase on OpenAI's 15 million image subset of YFCC achieves 32.7% top-1 accuracy on ImageNet. | ||||
|  | ||||
| @ -526,7 +526,7 @@ Keywords: Model deployment, CLoud, Mobile, Edge | ||||
|  | ||||
| ## [underthesea](https://github.com/undertheseanlp/underthesea) | ||||
|  | ||||
| [underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provide extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing. | ||||
| [underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provides extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing. | ||||
|  | ||||
| Keywords: Vietnamese, NLP | ||||
|  | ||||
| @ -596,7 +596,7 @@ Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active | ||||
|  | ||||
| ## [BentoML](https://github.com/bentoml/BentoML) | ||||
|  | ||||
| [BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. | ||||
| [BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models.  | ||||
| All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage. | ||||
|  | ||||
| Keywords: BentoML, Framework, Deployment, AI Applications | ||||
| @ -606,3 +606,4 @@ Keywords: BentoML, Framework, Deployment, AI Applications | ||||
| [LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory) offers a user-friendly fine-tuning framework that incorporates PEFT. The repository includes training(fine-tuning) and inference examples for LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, and other LLMs. A ChatGLM version is also available in [ChatGLM-Efficient-Tuning](https://github.com/hiyouga/ChatGLM-Efficient-Tuning). | ||||
|  | ||||
| Keywords: PEFT, fine-tuning, LLaMA-2, ChatGLM, Qwen | ||||
|  | ||||
|  | ||||
							
								
								
									
										1
									
								
								benchmark/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								benchmark/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| benchmark_results/ | ||||
| @ -1,354 +0,0 @@ | ||||
| # Copyright 2025 The HuggingFace Team. All rights reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| import os | ||||
| import sys | ||||
| from logging import Logger | ||||
| from threading import Event, Thread | ||||
| from time import perf_counter, sleep | ||||
| from typing import Optional | ||||
|  | ||||
|  | ||||
| # Add the parent directory to Python path to import benchmarks_entrypoint | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| import gpustat | ||||
| import psutil | ||||
| import psycopg2 | ||||
| from benchmarks_entrypoint import MetricsRecorder | ||||
|  | ||||
|  | ||||
| # Optional heavy ML dependencies - only required when actually running the benchmark | ||||
| try: | ||||
|     import torch | ||||
|  | ||||
|     from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache | ||||
|  | ||||
|     TRANSFORMERS_AVAILABLE = True | ||||
| except ImportError: | ||||
|     TRANSFORMERS_AVAILABLE = False | ||||
|     torch = None | ||||
|     AutoModelForCausalLM = None | ||||
|     AutoTokenizer = None | ||||
|     GenerationConfig = None | ||||
|     StaticCache = None | ||||
|  | ||||
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | ||||
| os.environ["TOKENIZERS_PARALLELISM"] = "1" | ||||
|  | ||||
| # Only set torch precision if torch is available | ||||
| if TRANSFORMERS_AVAILABLE: | ||||
|     torch.set_float32_matmul_precision("high") | ||||
|  | ||||
|  | ||||
| def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder): | ||||
|     p = psutil.Process(os.getpid()) | ||||
|     while not continue_metric_collection.is_set(): | ||||
|         with p.oneshot(): | ||||
|             cpu_util = p.cpu_percent() | ||||
|             mem_megabytes = p.memory_info().rss / (1024 * 1024) | ||||
|         gpu_stats = gpustat.GPUStatCollection.new_query() | ||||
|         gpu_util = gpu_stats[0]["utilization.gpu"] | ||||
|         gpu_mem_megabytes = gpu_stats[0]["memory.used"] | ||||
|         metrics_recorder.collect_device_measurements( | ||||
|             benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes | ||||
|         ) | ||||
|         sleep(0.01) | ||||
|  | ||||
|  | ||||
| def run_benchmark( | ||||
|     logger: Logger, | ||||
|     repository: str, | ||||
|     branch: str, | ||||
|     commit_id: str, | ||||
|     commit_msg: str, | ||||
|     metrics_recorder=None, | ||||
|     num_tokens_to_generate=100, | ||||
| ): | ||||
|     # Check if required ML dependencies are available | ||||
|     if not TRANSFORMERS_AVAILABLE: | ||||
|         logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:") | ||||
|         logger.error("pip install torch transformers") | ||||
|         logger.error("Skipping LLaMA benchmark due to missing dependencies.") | ||||
|         return | ||||
|  | ||||
|     continue_metric_collection = Event() | ||||
|     metrics_thread = None | ||||
|     model_id = "meta-llama/Llama-2-7b-hf" | ||||
|  | ||||
|     # If no metrics_recorder is provided, create one for backward compatibility | ||||
|     if metrics_recorder is None: | ||||
|         try: | ||||
|             metrics_recorder = MetricsRecorder( | ||||
|                 psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True | ||||
|             ) | ||||
|             should_close_recorder = True | ||||
|         except Exception as e: | ||||
|             logger.error(f"Failed to create metrics recorder: {e}") | ||||
|             return | ||||
|     else: | ||||
|         should_close_recorder = False | ||||
|     try: | ||||
|         gpu_stats = gpustat.GPUStatCollection.new_query() | ||||
|         gpu_name = gpu_stats[0]["name"] | ||||
|         benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id}) | ||||
|         logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}") | ||||
|         metrics_thread = Thread( | ||||
|             target=collect_metrics, | ||||
|             args=[benchmark_id, continue_metric_collection, metrics_recorder], | ||||
|         ) | ||||
|         metrics_thread.start() | ||||
|         logger.info("started background thread to fetch device metrics") | ||||
|  | ||||
|         os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling | ||||
|  | ||||
|         device = "cuda" | ||||
|  | ||||
|         logger.info("downloading weights") | ||||
|         # This is to avoid counting download in model load time measurement | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16) | ||||
|         gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1) | ||||
|         logger.info("loading model") | ||||
|         start = perf_counter() | ||||
|         model = AutoModelForCausalLM.from_pretrained( | ||||
|             model_id, dtype=torch.float16, generation_config=gen_config | ||||
|         ).eval() | ||||
|         model.to(device) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         model_load_time = end - start | ||||
|         logger.info(f"loaded model in: {model_load_time}s") | ||||
|  | ||||
|         tokenizer = AutoTokenizer.from_pretrained(model_id) | ||||
|  | ||||
|         prompt = "Why dogs are so cute?" | ||||
|         inputs = tokenizer(prompt, return_tensors="pt").to(device) | ||||
|  | ||||
|         # Specify the max length (including both the prompt and the response) | ||||
|         # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object | ||||
|         # with sequence length = `max_length`. The longer the more you will re-use it | ||||
|         seq_length = inputs["input_ids"].shape[1] | ||||
|         model.generation_config.max_length = seq_length + num_tokens_to_generate | ||||
|         batch_size = inputs["input_ids"].shape[0] | ||||
|  | ||||
|         # Copied from the gpt-fast repo | ||||
|         def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization | ||||
|             q = torch.empty_like(probs_sort).exponential_(1) | ||||
|             return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int) | ||||
|  | ||||
|         def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None): | ||||
|             logits = logits / max(temperature, 1e-5) | ||||
|  | ||||
|             if top_k is not None: | ||||
|                 v, _ = torch.topk(logits, min(top_k, logits.size(-1))) | ||||
|                 pivot = v.select(-1, -1).unsqueeze(-1) | ||||
|                 logits = torch.where(logits < pivot, -float("Inf"), logits) | ||||
|             probs = torch.nn.functional.softmax(logits, dim=-1) | ||||
|             return probs | ||||
|  | ||||
|         def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None): | ||||
|             probs = logits_to_probs(logits[0, -1], temperature, top_k) | ||||
|             idx_next = multinomial_sample_one_no_sync(probs) | ||||
|             return idx_next, probs | ||||
|  | ||||
|         # First eager forward pass | ||||
|         logger.info("running first eager forward pass") | ||||
|         start = perf_counter() | ||||
|         _ = model(**inputs) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         first_eager_fwd_pass_time = end - start | ||||
|         logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s") | ||||
|  | ||||
|         # Second eager forward pass (should be faster) | ||||
|         logger.info("running second eager forward pass") | ||||
|         start = perf_counter() | ||||
|         _ = model(**inputs) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         second_eager_fwd_pass_time = end - start | ||||
|         logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s") | ||||
|  | ||||
|         # First eager generation | ||||
|         logger.info("running first eager generation") | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         first_eager_generate_time = end - start | ||||
|         logger.info(f"completed first eager generation in: {first_eager_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         # Second eager generation (should be faster) | ||||
|         logger.info("running second eager generation") | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         second_eager_generate_time = end - start | ||||
|         logger.info(f"completed second eager generation in: {second_eager_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         logger.info("running generation timing loop") | ||||
|  | ||||
|         input_pos = torch.arange(0, seq_length, device=device) | ||||
|         inputs = inputs["input_ids"] | ||||
|  | ||||
|         start = perf_counter() | ||||
|         with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH): | ||||
|             logits = model(inputs, position_ids=input_pos).logits | ||||
|         next_token, probs = sample(logits, temperature=0.6, top_k=5) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         time_to_first_token = end - start | ||||
|  | ||||
|         input_pos = torch.tensor([seq_length], device=device, dtype=torch.int) | ||||
|         next_token = next_token.clone() | ||||
|         start = perf_counter() | ||||
|         with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH): | ||||
|             logits = model(next_token, position_ids=input_pos).logits | ||||
|         next_token, probs = sample(logits, temperature=0.6, top_k=5) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         time_to_second_token = end - start | ||||
|  | ||||
|         input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int) | ||||
|         next_token = next_token.clone() | ||||
|         start = perf_counter() | ||||
|         with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH): | ||||
|             logits = model(next_token, position_ids=input_pos).logits | ||||
|         next_token, probs = sample(logits, temperature=0.6, top_k=5) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         time_to_third_token = end - start | ||||
|  | ||||
|         logger.info("running longer generation timing loop") | ||||
|  | ||||
|         total_time = 0 | ||||
|         for i in range(20): | ||||
|             input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int) | ||||
|             next_token = next_token.clone() | ||||
|             start = perf_counter() | ||||
|             with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH): | ||||
|                 logits = model(next_token, position_ids=input_pos).logits | ||||
|             next_token, probs = sample(logits, temperature=0.6, top_k=5) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             total_time += end - start | ||||
|  | ||||
|         mean_time_to_next_token = total_time / 20 | ||||
|  | ||||
|         logger.info("running compilation benchmarks") | ||||
|  | ||||
|         # Now compile the model | ||||
|         model = torch.compile(model, mode="max-autotune", fullgraph=True) | ||||
|  | ||||
|         # StaticCache for generation | ||||
|         with torch.device(device): | ||||
|             model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate) | ||||
|  | ||||
|         input_pos = torch.arange(0, seq_length, device=device) | ||||
|         inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"] | ||||
|  | ||||
|         logger.info("compiling model") | ||||
|  | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config) | ||||
|         model.to(device) | ||||
|         model = torch.compile(model, mode="max-autotune", fullgraph=True) | ||||
|  | ||||
|         past_key_values = StaticCache( | ||||
|             model.config, | ||||
|             max_batch_size=batch_size, | ||||
|             device=device, | ||||
|             dtype=torch.float16, | ||||
|             max_cache_len=seq_length + 128, | ||||
|         ) | ||||
|         # 1st call | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|         end = perf_counter() | ||||
|         first_compile_generate_time = end - start | ||||
|         logger.info(f"completed first compile generation in: {first_compile_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         past_key_values = StaticCache( | ||||
|             model.config, | ||||
|             max_batch_size=batch_size, | ||||
|             device=device, | ||||
|             dtype=torch.float16, | ||||
|             max_cache_len=seq_length + 128, | ||||
|         ) | ||||
|         # 2nd call | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|         end = perf_counter() | ||||
|         second_compile_generate_time = end - start | ||||
|         logger.info(f"completed second compile generation in: {second_compile_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         past_key_values = StaticCache( | ||||
|             model.config, | ||||
|             max_batch_size=batch_size, | ||||
|             device=device, | ||||
|             dtype=torch.float16, | ||||
|             max_cache_len=seq_length + 128, | ||||
|         ) | ||||
|         # 3rd call | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|         end = perf_counter() | ||||
|         third_compile_generate_time = end - start | ||||
|         logger.info(f"completed third compile generation in: {third_compile_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         past_key_values = StaticCache( | ||||
|             model.config, | ||||
|             max_batch_size=batch_size, | ||||
|             device=device, | ||||
|             dtype=torch.float16, | ||||
|             max_cache_len=seq_length + 128, | ||||
|         ) | ||||
|         # 4th call | ||||
|         start = perf_counter() | ||||
|         output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|         end = perf_counter() | ||||
|         fourth_compile_generate_time = end - start | ||||
|         logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s") | ||||
|         logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         metrics_recorder.collect_model_measurements( | ||||
|             benchmark_id, | ||||
|             { | ||||
|                 "model_load_time": model_load_time, | ||||
|                 "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, | ||||
|                 "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, | ||||
|                 "first_eager_generate_time_secs": first_eager_generate_time, | ||||
|                 "second_eager_generate_time_secs": second_eager_generate_time, | ||||
|                 "time_to_first_token_secs": time_to_first_token, | ||||
|                 "time_to_second_token_secs": time_to_second_token, | ||||
|                 "time_to_third_token_secs": time_to_third_token, | ||||
|                 "time_to_next_token_mean_secs": mean_time_to_next_token, | ||||
|                 "first_compile_generate_time_secs": first_compile_generate_time, | ||||
|                 "second_compile_generate_time_secs": second_compile_generate_time, | ||||
|                 "third_compile_generate_time_secs": third_compile_generate_time, | ||||
|                 "fourth_compile_generate_time_secs": fourth_compile_generate_time, | ||||
|             }, | ||||
|         ) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Caught exception: {e}") | ||||
|     continue_metric_collection.set() | ||||
|     if metrics_thread is not None: | ||||
|         metrics_thread.join() | ||||
|  | ||||
|     # Only close the recorder if we created it locally | ||||
|     if should_close_recorder: | ||||
|         metrics_recorder.close() | ||||
| @ -31,7 +31,9 @@ from contextlib import contextmanager | ||||
| from pathlib import Path | ||||
|  | ||||
| from git import Repo | ||||
|  | ||||
| from huggingface_hub import HfApi | ||||
|  | ||||
| from optimum_benchmark import Benchmark | ||||
| from optimum_benchmark_wrapper import main | ||||
|  | ||||
|  | ||||
| @ -1,36 +1,15 @@ | ||||
| # Copyright 2025 The HuggingFace Team. All rights reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| import argparse | ||||
| import importlib.util | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| import sys | ||||
| import uuid | ||||
| from datetime import datetime | ||||
| from typing import Dict, Tuple | ||||
|  | ||||
| import pandas as pd | ||||
| from psycopg2.extensions import register_adapter | ||||
| from psycopg2.extras import Json | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from psycopg2.extensions import register_adapter | ||||
|     from psycopg2.extras import Json | ||||
|  | ||||
|     register_adapter(dict, Json) | ||||
|     PSYCOPG2_AVAILABLE = True | ||||
| except ImportError: | ||||
|     PSYCOPG2_AVAILABLE = False | ||||
| register_adapter(dict, Json) | ||||
|  | ||||
|  | ||||
| class ImportModuleException(Exception): | ||||
| @ -39,272 +18,61 @@ class ImportModuleException(Exception): | ||||
|  | ||||
| class MetricsRecorder: | ||||
|     def __init__( | ||||
|         self, | ||||
|         connection, | ||||
|         logger: logging.Logger, | ||||
|         repository: str, | ||||
|         branch: str, | ||||
|         commit_id: str, | ||||
|         commit_msg: str, | ||||
|         collect_csv_data: bool = True, | ||||
|         self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str | ||||
|     ): | ||||
|         self.conn = connection | ||||
|         self.use_database = connection is not None | ||||
|         if self.use_database: | ||||
|             self.conn.autocommit = True | ||||
|         self.conn.autocommit = True | ||||
|         self.logger = logger | ||||
|         self.repository = repository | ||||
|         self.branch = branch | ||||
|         self.commit_id = commit_id | ||||
|         self.commit_msg = commit_msg | ||||
|         self.collect_csv_data = collect_csv_data | ||||
|  | ||||
|         # For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled) | ||||
|         if self.collect_csv_data: | ||||
|             # Initialize empty DataFrames with proper schemas | ||||
|             self.benchmarks_df = pd.DataFrame( | ||||
|                 columns=[ | ||||
|                     "benchmark_id", | ||||
|                     "repository", | ||||
|                     "branch", | ||||
|                     "commit_id", | ||||
|                     "commit_message", | ||||
|                     "metadata", | ||||
|                     "created_at", | ||||
|                 ] | ||||
|             ) | ||||
|             self.device_measurements_df = pd.DataFrame( | ||||
|                 columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"] | ||||
|             ) | ||||
|             self.model_measurements_df = pd.DataFrame( | ||||
|                 columns=[ | ||||
|                     "benchmark_id", | ||||
|                     "time", | ||||
|                     "model_load_time", | ||||
|                     "first_eager_forward_pass_time_secs", | ||||
|                     "second_eager_forward_pass_time_secs", | ||||
|                     "first_eager_generate_time_secs", | ||||
|                     "second_eager_generate_time_secs", | ||||
|                     "time_to_first_token_secs", | ||||
|                     "time_to_second_token_secs", | ||||
|                     "time_to_third_token_secs", | ||||
|                     "time_to_next_token_mean_secs", | ||||
|                     "first_compile_generate_time_secs", | ||||
|                     "second_compile_generate_time_secs", | ||||
|                     "third_compile_generate_time_secs", | ||||
|                     "fourth_compile_generate_time_secs", | ||||
|                 ] | ||||
|             ) | ||||
|         else: | ||||
|             self.benchmarks_df = None | ||||
|             self.device_measurements_df = None | ||||
|             self.model_measurements_df = None | ||||
|  | ||||
|     def initialise_benchmark(self, metadata: dict[str, str]) -> str: | ||||
|     def initialise_benchmark(self, metadata: dict[str, str]) -> int: | ||||
|         """ | ||||
|         Creates a new benchmark, returns the benchmark id (UUID) | ||||
|         Creates a new benchmark, returns the benchmark id | ||||
|         """ | ||||
|         # Generate a unique UUID for this benchmark | ||||
|         benchmark_id = str(uuid.uuid4()) | ||||
|  | ||||
|         if self.use_database: | ||||
|             with self.conn.cursor() as cur: | ||||
|                 cur.execute( | ||||
|                     "INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)", | ||||
|                     (benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata), | ||||
|                 ) | ||||
|                 self.logger.debug(f"initialised benchmark #{benchmark_id}") | ||||
|  | ||||
|         # Store benchmark data for CSV export (if enabled) | ||||
|         if self.collect_csv_data: | ||||
|             # Add row to pandas DataFrame | ||||
|             new_row = pd.DataFrame( | ||||
|                 [ | ||||
|                     { | ||||
|                         "benchmark_id": benchmark_id, | ||||
|                         "repository": self.repository, | ||||
|                         "branch": self.branch, | ||||
|                         "commit_id": self.commit_id, | ||||
|                         "commit_message": self.commit_msg, | ||||
|                         "metadata": json.dumps(metadata), | ||||
|                         "created_at": datetime.utcnow().isoformat(), | ||||
|                     } | ||||
|                 ] | ||||
|         # gpu_name: str, model_id: str | ||||
|         with self.conn.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 "INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id", | ||||
|                 (self.repository, self.branch, self.commit_id, self.commit_msg, metadata), | ||||
|             ) | ||||
|             self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True) | ||||
|             benchmark_id = cur.fetchone()[0] | ||||
|             logger.debug(f"initialised benchmark #{benchmark_id}") | ||||
|             return benchmark_id | ||||
|  | ||||
|         mode_info = [] | ||||
|         if self.use_database: | ||||
|             mode_info.append("database") | ||||
|         if self.collect_csv_data: | ||||
|             mode_info.append("CSV") | ||||
|         mode_str = " + ".join(mode_info) if mode_info else "no storage" | ||||
|  | ||||
|         self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)") | ||||
|         return benchmark_id | ||||
|  | ||||
|     def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes): | ||||
|     def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes): | ||||
|         """ | ||||
|         Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function. | ||||
|         """ | ||||
|         # Store device measurements for CSV export (if enabled) | ||||
|         if self.collect_csv_data: | ||||
|             # Add row to pandas DataFrame | ||||
|             new_row = pd.DataFrame( | ||||
|                 [ | ||||
|                     { | ||||
|                         "benchmark_id": benchmark_id, | ||||
|                         "cpu_util": cpu_util, | ||||
|                         "mem_megabytes": mem_megabytes, | ||||
|                         "gpu_util": gpu_util, | ||||
|                         "gpu_mem_megabytes": gpu_mem_megabytes, | ||||
|                         "time": datetime.utcnow().isoformat(), | ||||
|                     } | ||||
|                 ] | ||||
|         with self.conn.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", | ||||
|                 (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes), | ||||
|             ) | ||||
|             self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True) | ||||
|  | ||||
|         # Store in database if available | ||||
|         if self.use_database: | ||||
|             with self.conn.cursor() as cur: | ||||
|                 cur.execute( | ||||
|                     "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", | ||||
|                     (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes), | ||||
|                 ) | ||||
|  | ||||
|         self.logger.debug( | ||||
|             f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]" | ||||
|             f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]" | ||||
|         ) | ||||
|  | ||||
|     def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]): | ||||
|         # Store model measurements for CSV export (if enabled) | ||||
|         if self.collect_csv_data: | ||||
|             # Add row to pandas DataFrame with flattened measurements | ||||
|             row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()} | ||||
|             # Flatten the measurements dict into the row | ||||
|             row_data.update(measurements) | ||||
|  | ||||
|             new_row = pd.DataFrame([row_data]) | ||||
|             self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True) | ||||
|  | ||||
|         # Store in database if available | ||||
|         if self.use_database: | ||||
|             with self.conn.cursor() as cur: | ||||
|                 cur.execute( | ||||
|                     """ | ||||
|                     INSERT INTO model_measurements ( | ||||
|                         benchmark_id, | ||||
|                         measurements | ||||
|                     ) VALUES (%s, %s) | ||||
|                     """, | ||||
|                     ( | ||||
|                         benchmark_id, | ||||
|                         measurements, | ||||
|                     ), | ||||
|                 ) | ||||
|  | ||||
|         self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}") | ||||
|  | ||||
|     def export_to_csv(self, output_dir: str = "benchmark_results"): | ||||
|         """ | ||||
|         Export all collected data to CSV files using pandas DataFrames | ||||
|         """ | ||||
|         if not self.collect_csv_data: | ||||
|             self.logger.warning("CSV data collection is disabled - no CSV files will be generated") | ||||
|             return | ||||
|  | ||||
|         if not os.path.exists(output_dir): | ||||
|             os.makedirs(output_dir) | ||||
|             self.logger.info(f"Created output directory: {output_dir}") | ||||
|  | ||||
|         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||||
|         files_created = [] | ||||
|  | ||||
|         # Export using pandas DataFrames | ||||
|         self._export_pandas_data(output_dir, timestamp, files_created) | ||||
|  | ||||
|         self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}") | ||||
|  | ||||
|     def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list): | ||||
|         """ | ||||
|         Export CSV files using pandas DataFrames | ||||
|         """ | ||||
|         # Export benchmarks | ||||
|         benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv") | ||||
|         self.benchmarks_df.to_csv(benchmarks_file, index=False) | ||||
|         files_created.append(benchmarks_file) | ||||
|         self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}") | ||||
|  | ||||
|         # Export device measurements | ||||
|         device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv") | ||||
|         self.device_measurements_df.to_csv(device_file, index=False) | ||||
|         files_created.append(device_file) | ||||
|         self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}") | ||||
|  | ||||
|         # Export model measurements (already flattened) | ||||
|         model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv") | ||||
|         self.model_measurements_df.to_csv(model_file, index=False) | ||||
|         files_created.append(model_file) | ||||
|         self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}") | ||||
|  | ||||
|         # Create comprehensive summary using pandas operations | ||||
|         summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv") | ||||
|         self._create_summary(summary_file) | ||||
|         files_created.append(summary_file) | ||||
|  | ||||
|     def _create_summary(self, summary_file: str): | ||||
|         """ | ||||
|         Create a comprehensive summary CSV using pandas operations | ||||
|         """ | ||||
|         if len(self.benchmarks_df) == 0: | ||||
|             # Create empty summary file | ||||
|             summary_df = pd.DataFrame() | ||||
|             summary_df.to_csv(summary_file, index=False) | ||||
|             self.logger.info(f"Created empty benchmark summary at {summary_file}") | ||||
|             return | ||||
|  | ||||
|         # Start with benchmarks as the base | ||||
|         summary_df = self.benchmarks_df.copy() | ||||
|  | ||||
|         # Add model measurements (join on benchmark_id) | ||||
|         if len(self.model_measurements_df) > 0: | ||||
|             # Drop 'time' column from model measurements to avoid conflicts | ||||
|             model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore") | ||||
|             summary_df = summary_df.merge(model_df, on="benchmark_id", how="left") | ||||
|  | ||||
|         # Calculate device measurement aggregates using pandas groupby | ||||
|         if len(self.device_measurements_df) > 0: | ||||
|             device_agg = ( | ||||
|                 self.device_measurements_df.groupby("benchmark_id") | ||||
|                 .agg( | ||||
|                     { | ||||
|                         "cpu_util": ["mean", "max", "std", "count"], | ||||
|                         "mem_megabytes": ["mean", "max", "std"], | ||||
|                         "gpu_util": ["mean", "max", "std"], | ||||
|                         "gpu_mem_megabytes": ["mean", "max", "std"], | ||||
|                     } | ||||
|                 ) | ||||
|                 .round(3) | ||||
|     def collect_model_measurements(self, benchmark_id: int, measurements: dict[str, float]): | ||||
|         with self.conn.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 """ | ||||
|                 INSERT INTO model_measurements ( | ||||
|                     benchmark_id, | ||||
|                     measurements | ||||
|                 ) VALUES (%s, %s) | ||||
|                 """, | ||||
|                 ( | ||||
|                     benchmark_id, | ||||
|                     measurements, | ||||
|                 ), | ||||
|             ) | ||||
|  | ||||
|             # Flatten column names | ||||
|             device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns] | ||||
|             device_agg = device_agg.reset_index() | ||||
|  | ||||
|             # Rename count column to be more descriptive | ||||
|             if "cpu_util_count" in device_agg.columns: | ||||
|                 device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"}) | ||||
|  | ||||
|             # Merge with summary | ||||
|             summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left") | ||||
|  | ||||
|         # Export the comprehensive summary | ||||
|         summary_df.to_csv(summary_file, index=False) | ||||
|         self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}") | ||||
|         self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}") | ||||
|  | ||||
|     def close(self): | ||||
|         if self.use_database and self.conn: | ||||
|             self.conn.close() | ||||
|         self.conn.close() | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| @ -317,7 +85,7 @@ handler.setFormatter(formatter) | ||||
| logger.addHandler(handler) | ||||
|  | ||||
|  | ||||
| def parse_arguments() -> tuple[str, str, str, str, bool, str]: | ||||
| def parse_arguments() -> tuple[str, str, str, str]: | ||||
|     """ | ||||
|     Parse command line arguments for the benchmarking CLI. | ||||
|     """ | ||||
| @ -347,21 +115,9 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]: | ||||
|         help="The commit message associated with the commit, truncated to 70 characters.", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.") | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--csv-output-dir", | ||||
|         type=str, | ||||
|         default="benchmark_results", | ||||
|         help="Directory for CSV output files (default: benchmark_results).", | ||||
|     ) | ||||
|  | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     # CSV is disabled by default, only enabled when --csv is used | ||||
|     generate_csv = args.csv | ||||
|  | ||||
|     return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir | ||||
|     return args.repository, args.branch, args.commit_id, args.commit_msg | ||||
|  | ||||
|  | ||||
| def import_from_path(module_name, file_path): | ||||
| @ -375,128 +131,22 @@ def import_from_path(module_name, file_path): | ||||
|         raise ImportModuleException(f"failed to load python module: {e}") | ||||
|  | ||||
|  | ||||
| def create_database_connection(): | ||||
|     """ | ||||
|     Try to create a database connection. Returns None if connection fails. | ||||
|     """ | ||||
|     if not PSYCOPG2_AVAILABLE: | ||||
|         logger.warning("psycopg2 not available - running in CSV-only mode") | ||||
|         return None | ||||
|  | ||||
|     try: | ||||
|         import psycopg2 | ||||
|  | ||||
|         conn = psycopg2.connect("dbname=metrics") | ||||
|         logger.info("Successfully connected to database") | ||||
|         return conn | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode") | ||||
|         return None | ||||
|  | ||||
|  | ||||
| def create_global_metrics_recorder( | ||||
|     repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False | ||||
| ) -> MetricsRecorder: | ||||
|     """ | ||||
|     Create a global metrics recorder that will be used across all benchmarks. | ||||
|     """ | ||||
|     connection = create_database_connection() | ||||
|     recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv) | ||||
|  | ||||
|     # Log the storage mode | ||||
|     storage_modes = [] | ||||
|     if connection is not None: | ||||
|         storage_modes.append("database") | ||||
|     if generate_csv: | ||||
|         storage_modes.append("CSV") | ||||
|  | ||||
|     if not storage_modes: | ||||
|         logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)") | ||||
|         logger.warning("Use --csv flag to enable CSV output when database is unavailable") | ||||
|     else: | ||||
|         logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage") | ||||
|  | ||||
|     return recorder | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__)) | ||||
|     benches_folder_path = os.path.join(benchmarks_folder_path, "benches") | ||||
|  | ||||
|     repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments() | ||||
|     repository, branch, commit_id, commit_msg = parse_arguments() | ||||
|  | ||||
|     # Create a global metrics recorder | ||||
|     global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv) | ||||
|  | ||||
|     successful_benchmarks = 0 | ||||
|     failed_benchmarks = 0 | ||||
|  | ||||
|     # Automatically discover all benchmark modules in benches/ folder | ||||
|     benchmark_modules = [] | ||||
|  | ||||
|     if os.path.exists(benches_folder_path): | ||||
|         logger.debug(f"Scanning for benchmarks in: {benches_folder_path}") | ||||
|         for entry in os.scandir(benches_folder_path): | ||||
|     for entry in os.scandir(benchmarks_folder_path): | ||||
|         try: | ||||
|             if not entry.name.endswith(".py"): | ||||
|                 continue | ||||
|             if entry.name.startswith("__"):  # Skip __init__.py, __pycache__, etc. | ||||
|             if entry.path == __file__: | ||||
|                 continue | ||||
|  | ||||
|             # Check if the file has a run_benchmark function | ||||
|             try: | ||||
|                 logger.debug(f"checking if benches/{entry.name} has run_benchmark function") | ||||
|                 module = import_from_path(entry.name.split(".")[0], entry.path) | ||||
|                 if hasattr(module, "run_benchmark"): | ||||
|                     benchmark_modules.append(entry.name) | ||||
|                     logger.debug(f"discovered benchmark: {entry.name}") | ||||
|                 else: | ||||
|                     logger.debug(f"skipping {entry.name} - no run_benchmark function found") | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"failed to check benches/{entry.name}: {e}") | ||||
|     else: | ||||
|         logger.warning(f"Benches directory not found: {benches_folder_path}") | ||||
|  | ||||
|     if benchmark_modules: | ||||
|         logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}") | ||||
|     else: | ||||
|         logger.warning("No benchmark modules found in benches/ directory") | ||||
|  | ||||
|     for module_name in benchmark_modules: | ||||
|         module_path = os.path.join(benches_folder_path, module_name) | ||||
|         try: | ||||
|             logger.debug(f"loading: {module_name}") | ||||
|             module = import_from_path(module_name.split(".")[0], module_path) | ||||
|             logger.info(f"running benchmarks in: {module_name}") | ||||
|  | ||||
|             # Check if the module has an updated run_benchmark function that accepts metrics_recorder | ||||
|             try: | ||||
|                 # Try the new signature first | ||||
|                 module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder) | ||||
|             except TypeError: | ||||
|                 # Fall back to the old signature for backward compatibility | ||||
|                 logger.warning( | ||||
|                     f"Module {module_name} using old run_benchmark signature - database connection will be created per module" | ||||
|                 ) | ||||
|                 module.run_benchmark(logger, repository, branch, commit_id, commit_msg) | ||||
|  | ||||
|             successful_benchmarks += 1 | ||||
|             logger.debug(f"loading: {entry.name}") | ||||
|             module = import_from_path(entry.name.split(".")[0], entry.path) | ||||
|             logger.info(f"running benchmarks in: {entry.name}") | ||||
|             module.run_benchmark(logger, repository, branch, commit_id, commit_msg) | ||||
|         except ImportModuleException as e: | ||||
|             logger.error(e) | ||||
|             failed_benchmarks += 1 | ||||
|         except Exception as e: | ||||
|             logger.error(f"error running benchmarks for {module_name}: {e}") | ||||
|             failed_benchmarks += 1 | ||||
|  | ||||
|     # Export CSV results at the end (if enabled) | ||||
|     try: | ||||
|         if generate_csv: | ||||
|             global_metrics_recorder.export_to_csv(csv_output_dir) | ||||
|             logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory") | ||||
|         else: | ||||
|             logger.info("CSV generation disabled - no CSV files created (use --csv to enable)") | ||||
|  | ||||
|         logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}") | ||||
|     except Exception as e: | ||||
|         logger.error(f"Failed to export CSV results: {e}") | ||||
|     finally: | ||||
|         global_metrics_recorder.close() | ||||
|             logger.error(f"error running benchmarks for {entry.name}: {e}") | ||||
|  | ||||
| @ -19,7 +19,7 @@ backend: | ||||
|   model: meta-llama/Llama-2-7b-hf | ||||
|   cache_implementation: static | ||||
|   torch_compile: true | ||||
|   dtype: float16 | ||||
|   torch_dtype: float16 | ||||
|   torch_compile_config: | ||||
|     backend: inductor | ||||
|     mode: reduce-overhead | ||||
|  | ||||
							
								
								
									
										34
									
								
								benchmark/init_db.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								benchmark/init_db.sql
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,34 @@ | ||||
| CREATE TABLE IF NOT EXISTS benchmarks ( | ||||
|   benchmark_id SERIAL PRIMARY KEY, | ||||
|   repository VARCHAR(255), | ||||
|   branch VARCHAR(255), | ||||
|   commit_id VARCHAR(72), | ||||
|   commit_message VARCHAR(70), | ||||
|   metadata jsonb, | ||||
|   created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC') | ||||
| ); | ||||
|  | ||||
| CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id); | ||||
|  | ||||
| CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch); | ||||
|  | ||||
| CREATE TABLE IF NOT EXISTS device_measurements ( | ||||
|   measurement_id SERIAL PRIMARY KEY, | ||||
|   benchmark_id int REFERENCES benchmarks (benchmark_id), | ||||
|   cpu_util double precision, | ||||
|   mem_megabytes double precision, | ||||
|   gpu_util double precision, | ||||
|   gpu_mem_megabytes double precision, | ||||
|   time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC') | ||||
| ); | ||||
|  | ||||
| CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id); | ||||
|  | ||||
| CREATE TABLE IF NOT EXISTS model_measurements ( | ||||
|   measurement_id SERIAL PRIMARY KEY, | ||||
|   benchmark_id int REFERENCES benchmarks (benchmark_id), | ||||
|   measurements jsonb, | ||||
|   time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC') | ||||
| ); | ||||
|  | ||||
| CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id); | ||||
							
								
								
									
										346
									
								
								benchmark/llama.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										346
									
								
								benchmark/llama.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,346 @@ | ||||
| from logging import Logger | ||||
| import os | ||||
| from threading import Event, Thread | ||||
| from time import perf_counter, sleep | ||||
| from typing import Optional | ||||
| from benchmarks_entrypoint import MetricsRecorder | ||||
| import gpustat | ||||
| import psutil | ||||
| import psycopg2 | ||||
| import torch | ||||
|  | ||||
| from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache | ||||
|  | ||||
|  | ||||
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | ||||
|  | ||||
| os.environ["TOKENIZERS_PARALLELISM"] = "1" | ||||
| torch.set_float32_matmul_precision("high") | ||||
|  | ||||
|  | ||||
| def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder): | ||||
|     p = psutil.Process(os.getpid()) | ||||
|     while not continue_metric_collection.is_set(): | ||||
|         with p.oneshot(): | ||||
|             cpu_util = p.cpu_percent() | ||||
|             mem_megabytes = p.memory_info().rss / (1024 * 1024) | ||||
|         gpu_stats = gpustat.GPUStatCollection.new_query() | ||||
|         gpu_util = gpu_stats[0]["utilization.gpu"] | ||||
|         gpu_mem_megabytes = gpu_stats[0]["memory.used"] | ||||
|         metrics_recorder.collect_device_measurements( | ||||
|             benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes | ||||
|         ) | ||||
|         sleep(0.01) | ||||
|  | ||||
|  | ||||
| def run_benchmark( | ||||
|     logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100 | ||||
| ): | ||||
|     continue_metric_collection = Event() | ||||
|     metrics_thread = None | ||||
|     model_id = "meta-llama/Llama-2-7b-hf" | ||||
|     metrics_recorder = MetricsRecorder( | ||||
|         psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg | ||||
|     ) | ||||
|     try: | ||||
|         gpu_stats = gpustat.GPUStatCollection.new_query() | ||||
|         gpu_name = gpu_stats[0]["name"] | ||||
|         benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id}) | ||||
|         logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}") | ||||
|         metrics_thread = Thread( | ||||
|             target=collect_metrics, | ||||
|             args=[benchmark_id, continue_metric_collection, metrics_recorder], | ||||
|         ) | ||||
|         metrics_thread.start() | ||||
|         logger.info("started background thread to fetch device metrics") | ||||
|  | ||||
|         os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling | ||||
|  | ||||
|         device = "cuda" | ||||
|  | ||||
|         logger.info("downloading weights") | ||||
|         # This is to avoid counting download in model load time measurement | ||||
|         model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16) | ||||
|         gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1) | ||||
|         logger.info("loading model") | ||||
|         start = perf_counter() | ||||
|         model = AutoModelForCausalLM.from_pretrained( | ||||
|             model_id, torch_dtype=torch.float16, generation_config=gen_config | ||||
|         ).eval() | ||||
|         model.to(device) | ||||
|         torch.cuda.synchronize() | ||||
|         end = perf_counter() | ||||
|         model_load_time = end - start | ||||
|         logger.info(f"loaded model in: {model_load_time}s") | ||||
|  | ||||
|         tokenizer = AutoTokenizer.from_pretrained(model_id) | ||||
|  | ||||
|         prompt = "Why dogs are so cute?" | ||||
|         inputs = tokenizer(prompt, return_tensors="pt").to(device) | ||||
|  | ||||
|         # Specify the max length (including both the prompt and the response) | ||||
|         # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object | ||||
|         # with sequence length = `max_length`. The longer the more you will re-use it | ||||
|         seq_length = inputs["input_ids"].shape[1] | ||||
|         model.generation_config.max_length = seq_length + num_tokens_to_generate | ||||
|         batch_size = inputs["input_ids"].shape[0] | ||||
|  | ||||
|         # Copied from the gpt-fast repo | ||||
|         def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization | ||||
|             q = torch.empty_like(probs_sort).exponential_(1) | ||||
|             return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int) | ||||
|  | ||||
|         def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None): | ||||
|             logits = logits / max(temperature, 1e-5) | ||||
|  | ||||
|             if top_k is not None: | ||||
|                 v, _ = torch.topk(logits, min(top_k, logits.size(-1))) | ||||
|                 pivot = v.select(-1, -1).unsqueeze(-1) | ||||
|                 logits = torch.where(logits < pivot, -float("Inf"), logits) | ||||
|             probs = torch.nn.functional.softmax(logits, dim=-1) | ||||
|             return probs | ||||
|  | ||||
|         def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None): | ||||
|             probs = logits_to_probs(logits[:, -1], temperature, top_k) | ||||
|             idx_next = multinomial_sample_one_no_sync(probs) | ||||
|             return idx_next, probs | ||||
|  | ||||
|         def decode_one_token(model, cur_token, cache_position, past_key_values): | ||||
|             logits = model( | ||||
|                 cur_token, | ||||
|                 cache_position=cache_position, | ||||
|                 past_key_values=past_key_values, | ||||
|                 return_dict=False, | ||||
|                 use_cache=True, | ||||
|             )[0] | ||||
|             new_token = sample(logits, temperature=0.6, top_k=5)[0] | ||||
|             return new_token | ||||
|  | ||||
|         ######### | ||||
|         # Eager # | ||||
|         ######### | ||||
|         with torch.no_grad(): | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + num_tokens_to_generate, | ||||
|             ) | ||||
|             cache_position = torch.arange(seq_length, device=device) | ||||
|             start = perf_counter() | ||||
|             model( | ||||
|                 **inputs, | ||||
|                 cache_position=cache_position, | ||||
|                 past_key_values=past_key_values, | ||||
|                 return_dict=False, | ||||
|                 use_cache=True, | ||||
|             ) | ||||
|             end = perf_counter() | ||||
|             first_eager_fwd_pass_time = end - start | ||||
|             logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s") | ||||
|             start = perf_counter() | ||||
|             output = model.generate(**inputs, do_sample=False) | ||||
|             end = perf_counter() | ||||
|             first_eager_generate_time = end - start | ||||
|             logger.info(f"completed first eager generation in: {first_eager_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + num_tokens_to_generate, | ||||
|             ) | ||||
|             cache_position = torch.arange(seq_length, device=device) | ||||
|             start = perf_counter() | ||||
|             model( | ||||
|                 **inputs, | ||||
|                 cache_position=cache_position, | ||||
|                 past_key_values=past_key_values, | ||||
|                 return_dict=False, | ||||
|                 use_cache=True, | ||||
|             ) | ||||
|             end = perf_counter() | ||||
|             second_eager_fwd_pass_time = end - start | ||||
|             logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s") | ||||
|             start = perf_counter() | ||||
|             model.generate(**inputs, do_sample=False) | ||||
|             end = perf_counter() | ||||
|             second_eager_generate_time = end - start | ||||
|             logger.info(f"completed second eager generation in: {second_eager_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|             torch.compiler.reset() | ||||
|  | ||||
|             ################ | ||||
|             # Forward pass # | ||||
|             ################ | ||||
|  | ||||
|             # `torch.compile(model, ...)` is not recommended as you compile callbacks | ||||
|             # and full generate. We recommend compiling only the forward for now. | ||||
|             # "reduce-overhead" will use cudagraphs. | ||||
|             generated_ids = torch.zeros( | ||||
|                 (batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device | ||||
|             ) | ||||
|  | ||||
|             generated_ids[:, :seq_length] = inputs["input_ids"] | ||||
|             decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True) | ||||
|             # model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True) | ||||
|             # TODO use  decode_one_token(model, input_id.clone(), cache_position) for verification | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + num_tokens_to_generate + 10, | ||||
|             ) | ||||
|             cache_position = torch.arange(seq_length, device=device) | ||||
|             all_generated_tokens = [] | ||||
|             ### First compile, prefill | ||||
|             start = perf_counter() | ||||
|             next_token = decode_one_token( | ||||
|                 model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values | ||||
|             ) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             time_to_first_token = end - start | ||||
|             logger.info(f"completed first compile generation in: {time_to_first_token}s") | ||||
|             cache_position += 1 | ||||
|             all_generated_tokens += next_token.tolist() | ||||
|  | ||||
|             cache_position = torch.tensor([seq_length], device=device) | ||||
|             ### First compile, decoding | ||||
|             start = perf_counter() | ||||
|             next_token = decode_one_token( | ||||
|                 model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values | ||||
|             ) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             time_to_second_token = end - start | ||||
|             logger.info(f"completed second compile generation in: {time_to_second_token}s") | ||||
|             cache_position += 1 | ||||
|             all_generated_tokens += next_token.tolist() | ||||
|  | ||||
|             ### Second compile, decoding | ||||
|             start = perf_counter() | ||||
|             next_token = decode_one_token( | ||||
|                 model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values | ||||
|             ) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             time_to_third_token = end - start | ||||
|             logger.info(f"completed third compile forward in: {time_to_third_token}s") | ||||
|             cache_position += 1 | ||||
|             all_generated_tokens += next_token.tolist() | ||||
|  | ||||
|             ### Using cuda graphs decoding | ||||
|  | ||||
|             start = perf_counter() | ||||
|             for _ in range(1, num_tokens_to_generate): | ||||
|                 all_generated_tokens += next_token.tolist() | ||||
|                 next_token = decode_one_token( | ||||
|                     model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values | ||||
|                 ) | ||||
|                 cache_position += 1 | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             mean_time_to_next_token = (end - start) / num_tokens_to_generate | ||||
|             logger.info(f"completed next compile generation in: {mean_time_to_next_token}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}") | ||||
|  | ||||
|             #################### | ||||
|             # Generate compile # | ||||
|             #################### | ||||
|             torch.compiler.reset() | ||||
|             # we will not compile full generate as it' s to intensive, tho we measure full forward! | ||||
|  | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + 128, | ||||
|             ) | ||||
|  | ||||
|             # 1st call | ||||
|             start = perf_counter() | ||||
|             output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             first_compile_generate_time = end - start | ||||
|             logger.info(f"completed first compile generation in: {first_compile_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + 128, | ||||
|             ) | ||||
|             # 2nd call | ||||
|             start = perf_counter() | ||||
|             output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|             torch.cuda.synchronize() | ||||
|             end = perf_counter() | ||||
|             second_compile_generate_time = end - start | ||||
|             logger.info(f"completed second compile generation in: {second_compile_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + 128, | ||||
|             ) | ||||
|  | ||||
|             # 3rd call | ||||
|             start = perf_counter() | ||||
|             output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|             end = perf_counter() | ||||
|             third_compile_generate_time = end - start | ||||
|             logger.info(f"completed third compile generation in: {third_compile_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|             past_key_values = StaticCache( | ||||
|                 model.config, | ||||
|                 max_batch_size=batch_size, | ||||
|                 device=device, | ||||
|                 dtype=torch.float16, | ||||
|                 max_cache_len=seq_length + 128, | ||||
|             ) | ||||
|             # 4th call | ||||
|             start = perf_counter() | ||||
|             output = model.generate(**inputs, past_key_values=past_key_values) | ||||
|             end = perf_counter() | ||||
|             fourth_compile_generate_time = end - start | ||||
|             logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s") | ||||
|             logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") | ||||
|  | ||||
|         metrics_recorder.collect_model_measurements( | ||||
|             benchmark_id, | ||||
|             { | ||||
|                 "model_load_time": model_load_time, | ||||
|                 "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, | ||||
|                 "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, | ||||
|                 "first_eager_generate_time_secs": first_eager_generate_time, | ||||
|                 "second_eager_generate_time_secs": second_eager_generate_time, | ||||
|                 "time_to_first_token_secs": time_to_first_token, | ||||
|                 "time_to_second_token_secs": time_to_second_token, | ||||
|                 "time_to_third_token_secs": time_to_third_token, | ||||
|                 "time_to_next_token_mean_secs": mean_time_to_next_token, | ||||
|                 "first_compile_generate_time_secs": first_compile_generate_time, | ||||
|                 "second_compile_generate_time_secs": second_compile_generate_time, | ||||
|                 "third_compile_generate_time_secs": third_compile_generate_time, | ||||
|                 "fourth_compile_generate_time_secs": fourth_compile_generate_time, | ||||
|             }, | ||||
|         ) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Caught exception: {e}") | ||||
|     continue_metric_collection.set() | ||||
|     if metrics_thread is not None: | ||||
|         metrics_thread.join() | ||||
|     metrics_recorder.close() | ||||
| @ -3,11 +3,7 @@ import subprocess | ||||
|  | ||||
|  | ||||
| def main(config_dir, config_name, args): | ||||
|     subprocess.run( | ||||
|         ["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] | ||||
|         + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] | ||||
|         + args | ||||
|     ) | ||||
|     subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|  | ||||
| @ -2,5 +2,4 @@ gpustat==1.1.1 | ||||
| psutil==6.0.0 | ||||
| psycopg2==2.9.9 | ||||
| torch>=2.4.0 | ||||
| hf_transfer | ||||
| pandas>=1.5.0 | ||||
| hf_transfer | ||||
							
								
								
									
										1
									
								
								benchmark_v2/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								benchmark_v2/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| benchmark_results/ | ||||
| @ -1,138 +0,0 @@ | ||||
| # Benchmarking v2 | ||||
|  | ||||
| A comprehensive benchmarking framework for transformer models that supports multiple execution modes (eager, compiled, kernelized), detailed performance metrics collection, and structured output format. | ||||
|  | ||||
|  | ||||
| ## Quick Start | ||||
|  | ||||
| ### Running All Benchmarks | ||||
|  | ||||
| ```bash | ||||
| # Run all benchmarks with default settings | ||||
| python run_benchmarks.py | ||||
|  | ||||
| # Specify output directory | ||||
| python run_benchmarks.py --output-dir my_results | ||||
|  | ||||
| # Run with custom parameters | ||||
| python run_benchmarks.py \ | ||||
|     --warmup-iterations 5 \ | ||||
|     --measurement-iterations 10 \ | ||||
|     --num-tokens-to-generate 200 | ||||
| ``` | ||||
|  | ||||
| ### Uploading Results to HuggingFace Dataset | ||||
|  | ||||
| You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis: | ||||
|  | ||||
| ```bash | ||||
| # Upload to a public dataset with auto-generated run ID | ||||
| python run_benchmarks.py --upload-to-hub username/benchmark-results | ||||
|  | ||||
| # Upload with a custom run ID for easy identification | ||||
| python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1 | ||||
|  | ||||
| # Upload with custom HuggingFace token (if not set in environment) | ||||
| python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here | ||||
| ``` | ||||
|  | ||||
| **Dataset Directory Structure:** | ||||
| ``` | ||||
| dataset_name/ | ||||
| ├── 2025-01-15/ | ||||
| │   ├── runs/                       # Non-scheduled runs (manual, PR, etc.) | ||||
| │   │   └── 123-1245151651/         # GitHub run number and ID | ||||
| │   │       └── benchmark_results/ | ||||
| │   │           ├── benchmark_summary_20250115_143022.json | ||||
| │   │           └── model-name/ | ||||
| │   │               └── model-name_benchmark_20250115_143022.json | ||||
| │   └── benchmark_results_abc123de/ # Scheduled runs (daily CI) | ||||
| │       ├── benchmark_summary_20250115_143022.json | ||||
| │       └── model-name/ | ||||
| │           └── model-name_benchmark_20250115_143022.json | ||||
| └── 2025-01-16/ | ||||
|     └── ... | ||||
| ``` | ||||
|  | ||||
| **Authentication for Uploads:** | ||||
|  | ||||
| For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence): | ||||
|  | ||||
| 1. Command line: `--token hf_your_token_here` | ||||
| 3. Environment variable: `HF_TOKEN` | ||||
|  | ||||
| ### Running Specific Benchmarks | ||||
|  | ||||
| ```bash | ||||
| # Include only specific benchmarks | ||||
| python run_benchmarks.py --include llama | ||||
|  | ||||
| # Exclude specific benchmarks | ||||
| python run_benchmarks.py --exclude old_benchmark | ||||
|  | ||||
| ## Output Format | ||||
|  | ||||
| Results are saved as JSON files with the following structure: | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "model_name": "llama_2_7b", | ||||
|   "benchmark_scenarios": [ | ||||
|     { | ||||
|       "scenario_name": "eager_variant", | ||||
|       "metadata": { | ||||
|         "timestamp": "2025-01-XX...", | ||||
|         "commit_id": "abc123...", | ||||
|         "hardware_info": { | ||||
|           "gpu_name": "NVIDIA A100", | ||||
|           "gpu_memory_total": 40960, | ||||
|           "cpu_count": 64 | ||||
|         }, | ||||
|         "config": { | ||||
|           "variant": "eager", | ||||
|           "warmup_iterations": 3, | ||||
|           "measurement_iterations": 5 | ||||
|         } | ||||
|       }, | ||||
|       "measurements": { | ||||
|         "latency": { | ||||
|           "mean": 2.45, | ||||
|           "median": 2.43, | ||||
|           "std": 0.12, | ||||
|           "min": 2.31, | ||||
|           "max": 2.67, | ||||
|           "p95": 2.61, | ||||
|           "p99": 2.65 | ||||
|         }, | ||||
|         "time_to_first_token": { | ||||
|           "mean": 0.15, | ||||
|           "std": 0.02 | ||||
|         }, | ||||
|         "tokens_per_second": { | ||||
|           "mean": 87.3, | ||||
|           "unit": "tokens/sec" | ||||
|         } | ||||
|       }, | ||||
|       "gpu_metrics": { | ||||
|         "gpu_utilization_mean": 85.2, | ||||
|         "gpu_memory_used_mean": 12450 | ||||
|       } | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Debug Mode | ||||
|  | ||||
| ```bash | ||||
| python run_benchmarks.py --log-level DEBUG | ||||
| ``` | ||||
|  | ||||
| ## Contributing | ||||
|  | ||||
| To add new benchmarks: | ||||
|  | ||||
| 1. Create a new file in `benches/` | ||||
| 2. Implement the `ModelBenchmark` interface | ||||
| 3. Add a runner function (`run_<benchmark_name>` or `run_benchmark`) | ||||
| 4. run_benchmarks.py | ||||
| @ -1 +0,0 @@ | ||||
| # Benchmark implementations directory | ||||
| @ -1,165 +0,0 @@ | ||||
| # Copyright 2025 The HuggingFace Team. All rights reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
|  | ||||
| import logging | ||||
| import os | ||||
| from typing import Any | ||||
|  | ||||
| import torch | ||||
| from benchmark_framework import ModelBenchmark | ||||
|  | ||||
|  | ||||
| os.environ["TOKENIZERS_PARALLELISM"] = "1" | ||||
| torch.set_float32_matmul_precision("high") | ||||
|  | ||||
|  | ||||
| class LLaMABenchmark(ModelBenchmark): | ||||
|     """Simplified LLaMA model benchmark implementation using the ModelBenchmark base class.""" | ||||
|  | ||||
|     def __init__(self, logger: logging.Logger): | ||||
|         super().__init__(logger) | ||||
|         self._default_prompt = "Why dogs are so cute?"  # Custom prompt for LLaMA | ||||
|  | ||||
|     def get_scenario_configs(self) -> list[dict[str, Any]]: | ||||
|         """ | ||||
|         Get LLaMA-specific scenario configurations. | ||||
|  | ||||
|         Returns: | ||||
|             List of scenario configuration dictionaries | ||||
|         """ | ||||
|         return [ | ||||
|             # Eager variants | ||||
|             {"variant": "eager", "compile_mode": None, "use_cache": True, "description": "Eager execution with cache"}, | ||||
|             # Compiled variants | ||||
|             { | ||||
|                 "variant": "compiled", | ||||
|                 "compile_mode": "max-autotune", | ||||
|                 "use_cache": True, | ||||
|                 "description": "Compiled with max autotune", | ||||
|             }, | ||||
|             # Kernelized variant (if available) | ||||
|             { | ||||
|                 "variant": "kernelized", | ||||
|                 "compile_mode": "max-autotune", | ||||
|                 "use_cache": True, | ||||
|                 "description": "Kernelized execution", | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|     def _is_kernelization_available(self) -> bool: | ||||
|         """Check if kernelization is available for LLaMA.""" | ||||
|         try: | ||||
|             from kernels import Mode, kernelize  # noqa: F401 | ||||
|  | ||||
|             return True | ||||
|         except ImportError: | ||||
|             self.logger.debug("Kernelization not available: kernels module not found") | ||||
|             return False | ||||
|  | ||||
|     def get_default_generation_config(self) -> dict[str, Any]: | ||||
|         """Get LLaMA-specific generation configuration.""" | ||||
|         return { | ||||
|             "do_sample": False, | ||||
|             "top_p": 1.0, | ||||
|             "temperature": 1.0, | ||||
|             "repetition_penalty": 1.0, | ||||
|             "max_new_tokens": None,  # Will be set per scenario | ||||
|         } | ||||
|  | ||||
|     def get_model_init_kwargs(self, config) -> dict[str, Any]: | ||||
|         """Get LLaMA-specific model initialization kwargs.""" | ||||
|         return { | ||||
|             "torch_dtype": getattr(torch, config.torch_dtype), | ||||
|             "attn_implementation": config.attn_implementation, | ||||
|             "use_cache": True, | ||||
|         } | ||||
|  | ||||
|     def get_default_torch_dtype(self) -> str: | ||||
|         """Get default torch dtype for LLaMA.""" | ||||
|         return "float16"  # LLaMA works well with float16 | ||||
|  | ||||
|     def get_default_device(self) -> str: | ||||
|         """Get default device for LLaMA.""" | ||||
|         return "cuda"  # LLaMA prefers CUDA | ||||
|  | ||||
|  | ||||
| def run_llama(logger, output_dir, **kwargs): | ||||
|     """ | ||||
|     Run LLaMA benchmark with the given configuration. | ||||
|  | ||||
|     Args: | ||||
|         logger: Logger instance | ||||
|         output_dir: Output directory for results | ||||
|         **kwargs: Additional configuration options | ||||
|  | ||||
|     Returns: | ||||
|         Path to output file if successful | ||||
|     """ | ||||
|     from benchmark_framework import BenchmarkRunner | ||||
|  | ||||
|     # Extract parameters with defaults | ||||
|     model_id = kwargs.get("model_id", "meta-llama/Llama-2-7b-hf") | ||||
|     warmup_iterations = kwargs.get("warmup_iterations", 3) | ||||
|     measurement_iterations = kwargs.get("measurement_iterations", 5) | ||||
|     num_tokens_to_generate = kwargs.get("num_tokens_to_generate", 100) | ||||
|     include_sdpa_variants = kwargs.get("include_sdpa_variants", True) | ||||
|     device = kwargs.get("device", "cuda") | ||||
|     torch_dtype = kwargs.get("torch_dtype", "float16") | ||||
|     batch_size = kwargs.get("batch_size", 1) | ||||
|     commit_id = kwargs.get("commit_id") | ||||
|  | ||||
|     logger.info(f"Starting LLaMA benchmark for model: {model_id}") | ||||
|     logger.info( | ||||
|         f"Configuration: warmup={warmup_iterations}, measurement={measurement_iterations}, tokens={num_tokens_to_generate}" | ||||
|     ) | ||||
|  | ||||
|     try: | ||||
|         # Create benchmark instance | ||||
|         benchmark = LLaMABenchmark(logger) | ||||
|  | ||||
|         # Create scenarios | ||||
|         scenarios = benchmark.create_scenarios( | ||||
|             model_id=model_id, | ||||
|             warmup_iterations=warmup_iterations, | ||||
|             measurement_iterations=measurement_iterations, | ||||
|             num_tokens_to_generate=num_tokens_to_generate, | ||||
|             include_sdpa_variants=include_sdpa_variants, | ||||
|             device=device, | ||||
|             torch_dtype=torch_dtype, | ||||
|             batch_size=batch_size, | ||||
|         ) | ||||
|  | ||||
|         logger.info(f"Created {len(scenarios)} benchmark scenarios") | ||||
|  | ||||
|         # Create runner and execute benchmarks | ||||
|         runner = BenchmarkRunner(logger, output_dir) | ||||
|         results = runner.run_benchmark(benchmark, scenarios, commit_id=commit_id) | ||||
|  | ||||
|         if not results: | ||||
|             logger.warning("No successful benchmark results") | ||||
|             return None | ||||
|  | ||||
|         # Save results | ||||
|         model_name = model_id.split("/")[-1]  # Extract model name from ID | ||||
|         output_file = runner.save_results(model_name, results) | ||||
|  | ||||
|         logger.info(f"LLaMA benchmark completed successfully. Results saved to: {output_file}") | ||||
|         return output_file | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"LLaMA benchmark failed: {e}") | ||||
|         import traceback | ||||
|  | ||||
|         logger.debug(traceback.format_exc()) | ||||
|         raise | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,7 +0,0 @@ | ||||
| numpy>=1.21.0 | ||||
| psutil>=5.8.0 | ||||
| gpustat>=1.0.0 | ||||
| torch>=2.0.0 | ||||
| transformers>=4.30.0 | ||||
| datasets>=2.10.0 | ||||
| huggingface_hub>=0.16.0  | ||||
| @ -1,495 +0,0 @@ | ||||
| #!/usr/bin/env python3 | ||||
| # Copyright 2025 The HuggingFace Team. All rights reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
|  | ||||
| """ | ||||
| Top-level benchmarking script that automatically discovers and runs all benchmarks | ||||
| in the ./benches directory, organizing outputs into model-specific subfolders. | ||||
| """ | ||||
|  | ||||
| import argparse | ||||
| import importlib.util | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| import sys | ||||
| import uuid | ||||
| from datetime import datetime | ||||
| from pathlib import Path | ||||
| from typing import Any, Optional | ||||
|  | ||||
|  | ||||
| def setup_logging(log_level: str = "INFO", enable_file_logging: bool = False) -> logging.Logger: | ||||
|     """Setup logging configuration.""" | ||||
|     numeric_level = getattr(logging, log_level.upper(), None) | ||||
|     if not isinstance(numeric_level, int): | ||||
|         raise ValueError(f"Invalid log level: {log_level}") | ||||
|  | ||||
|     handlers = [logging.StreamHandler(sys.stdout)] | ||||
|  | ||||
|     if enable_file_logging: | ||||
|         handlers.append(logging.FileHandler(f"benchmark_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")) | ||||
|  | ||||
|     logging.basicConfig( | ||||
|         level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers | ||||
|     ) | ||||
|  | ||||
|     return logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def discover_benchmarks(benches_dir: str) -> list[dict[str, Any]]: | ||||
|     """ | ||||
|     Discover all benchmark modules in the benches directory. | ||||
|  | ||||
|     Returns: | ||||
|         List of dictionaries containing benchmark module info | ||||
|     """ | ||||
|     benchmarks = [] | ||||
|     benches_path = Path(benches_dir) | ||||
|  | ||||
|     if not benches_path.exists(): | ||||
|         raise FileNotFoundError(f"Benches directory not found: {benches_dir}") | ||||
|  | ||||
|     for py_file in benches_path.glob("*.py"): | ||||
|         if py_file.name.startswith("__"): | ||||
|             continue | ||||
|  | ||||
|         module_name = py_file.stem | ||||
|  | ||||
|         try: | ||||
|             # Import the module | ||||
|             spec = importlib.util.spec_from_file_location(module_name, py_file) | ||||
|             module = importlib.util.module_from_spec(spec) | ||||
|             spec.loader.exec_module(module) | ||||
|  | ||||
|             # Check if it has a benchmark runner function | ||||
|             if hasattr(module, f"run_{module_name}"): | ||||
|                 benchmarks.append( | ||||
|                     { | ||||
|                         "name": module_name, | ||||
|                         "path": str(py_file), | ||||
|                         "module": module, | ||||
|                         "runner_function": getattr(module, f"run_{module_name}"), | ||||
|                     } | ||||
|                 ) | ||||
|             elif hasattr(module, "run_benchmark"): | ||||
|                 benchmarks.append( | ||||
|                     { | ||||
|                         "name": module_name, | ||||
|                         "path": str(py_file), | ||||
|                         "module": module, | ||||
|                         "runner_function": getattr(module, "run_benchmark"), | ||||
|                     } | ||||
|                 ) | ||||
|             else: | ||||
|                 logging.warning(f"No runner function found in {py_file}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logging.error(f"Failed to import {py_file}: {e}") | ||||
|  | ||||
|     return benchmarks | ||||
|  | ||||
|  | ||||
| def run_single_benchmark( | ||||
|     benchmark_info: dict[str, Any], output_dir: str, logger: logging.Logger, **kwargs | ||||
| ) -> Optional[str]: | ||||
|     """ | ||||
|     Run a single benchmark and return the output file path. | ||||
|  | ||||
|     Args: | ||||
|         benchmark_info: Dictionary containing benchmark module info | ||||
|         output_dir: Base output directory | ||||
|         logger: Logger instance | ||||
|         **kwargs: Additional arguments to pass to the benchmark | ||||
|  | ||||
|     Returns: | ||||
|         Path to the output file if successful, None otherwise | ||||
|     """ | ||||
|     benchmark_name = benchmark_info["name"] | ||||
|     runner_func = benchmark_info["runner_function"] | ||||
|  | ||||
|     logger.info(f"Running benchmark: {benchmark_name}") | ||||
|  | ||||
|     try: | ||||
|         # Check function signature to determine what arguments to pass | ||||
|         import inspect | ||||
|  | ||||
|         sig = inspect.signature(runner_func) | ||||
|  | ||||
|         # Prepare arguments based on function signature | ||||
|         func_kwargs = {"logger": logger, "output_dir": output_dir} | ||||
|  | ||||
|         # Add other kwargs if the function accepts them | ||||
|         for param_name in sig.parameters: | ||||
|             if param_name in kwargs: | ||||
|                 func_kwargs[param_name] = kwargs[param_name] | ||||
|  | ||||
|         # Filter kwargs to only include parameters the function accepts | ||||
|         # If function has **kwargs, include all provided kwargs | ||||
|         has_var_kwargs = any(param.kind == param.VAR_KEYWORD for param in sig.parameters.values()) | ||||
|         if has_var_kwargs: | ||||
|             valid_kwargs = {**func_kwargs, **kwargs} | ||||
|         else: | ||||
|             valid_kwargs = {k: v for k, v in func_kwargs.items() if k in sig.parameters} | ||||
|  | ||||
|         # Run the benchmark | ||||
|         result = runner_func(**valid_kwargs) | ||||
|  | ||||
|         if isinstance(result, str): | ||||
|             # Function returned a file path | ||||
|             return result | ||||
|         else: | ||||
|             logger.info(f"Benchmark {benchmark_name} completed successfully") | ||||
|             return "completed" | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Benchmark {benchmark_name} failed: {e}") | ||||
|         import traceback | ||||
|  | ||||
|         logger.debug(traceback.format_exc()) | ||||
|         return None | ||||
|  | ||||
|  | ||||
| def generate_summary_report( | ||||
|     output_dir: str, | ||||
|     benchmark_results: dict[str, Any], | ||||
|     logger: logging.Logger, | ||||
|     benchmark_run_uuid: Optional[str] = None, | ||||
| ) -> str: | ||||
|     """Generate a summary report of all benchmark runs.""" | ||||
|     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||||
|     summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json") | ||||
|  | ||||
|     summary_data = { | ||||
|         "run_metadata": { | ||||
|             "timestamp": datetime.utcnow().isoformat(), | ||||
|             "benchmark_run_uuid": benchmark_run_uuid, | ||||
|             "total_benchmarks": len(benchmark_results), | ||||
|             "successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]), | ||||
|             "failed_benchmarks": len([r for r in benchmark_results.values() if r is None]), | ||||
|         }, | ||||
|         "benchmark_results": benchmark_results, | ||||
|         "output_directory": output_dir, | ||||
|     } | ||||
|  | ||||
|     with open(summary_file, "w") as f: | ||||
|         json.dump(summary_data, f, indent=2, default=str) | ||||
|  | ||||
|     logger.info(f"Summary report saved to: {summary_file}") | ||||
|     return summary_file | ||||
|  | ||||
|  | ||||
| def upload_results_to_hf_dataset( | ||||
|     output_dir: str, | ||||
|     summary_file: str, | ||||
|     dataset_name: str, | ||||
|     run_id: Optional[str] = None, | ||||
|     token: Optional[str] = None, | ||||
|     logger: Optional[logging.Logger] = None, | ||||
| ) -> Optional[str]: | ||||
|     """ | ||||
|     Upload benchmark results to a HuggingFace Dataset. | ||||
|     Based on upload_collated_report() from utils/collated_reports.py | ||||
|     Args: | ||||
|         output_dir: Local output directory containing results | ||||
|         summary_file: Path to the summary file | ||||
|         dataset_name: Name of the HuggingFace dataset to upload to | ||||
|         run_id: Unique run identifier (if None, will generate one) | ||||
|         token: HuggingFace token for authentication (if None, will use environment variables) | ||||
|         logger: Logger instance | ||||
|     Returns: | ||||
|         The run_id used for the upload, None if upload failed | ||||
|     """ | ||||
|     if logger is None: | ||||
|         logger = logging.getLogger(__name__) | ||||
|  | ||||
|     import os | ||||
|  | ||||
|     from huggingface_hub import HfApi | ||||
|  | ||||
|     api = HfApi() | ||||
|  | ||||
|     if run_id is None: | ||||
|         github_run_number = os.getenv("GITHUB_RUN_NUMBER") | ||||
|         github_run_id = os.getenv("GITHUB_RUN_ID") | ||||
|         if github_run_number and github_run_id: | ||||
|             run_id = f"{github_run_number}-{github_run_id}" | ||||
|  | ||||
|     date_folder = datetime.now().strftime("%Y-%m-%d") | ||||
|  | ||||
|     github_event_name = os.getenv("GITHUB_EVENT_NAME") | ||||
|     if github_event_name != "schedule": | ||||
|         # Non-scheduled runs go under a runs subfolder | ||||
|         repo_path = f"{date_folder}/runs/{run_id}/benchmark_results" | ||||
|     else: | ||||
|         # Scheduled runs go directly under the date | ||||
|         repo_path = f"{date_folder}/{run_id}/benchmark_results" | ||||
|  | ||||
|     logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'") | ||||
|  | ||||
|     try: | ||||
|         # Upload all files in the output directory | ||||
|         from pathlib import Path | ||||
|  | ||||
|         output_path = Path(output_dir) | ||||
|  | ||||
|         for file_path in output_path.rglob("*"): | ||||
|             if file_path.is_file(): | ||||
|                 # Calculate relative path from output_dir | ||||
|                 relative_path = file_path.relative_to(output_path) | ||||
|                 path_in_repo = f"{repo_path}/{relative_path}" | ||||
|  | ||||
|                 logger.debug(f"Uploading {file_path} to {path_in_repo}") | ||||
|  | ||||
|                 api.upload_file( | ||||
|                     path_or_fileobj=str(file_path), | ||||
|                     path_in_repo=path_in_repo, | ||||
|                     repo_id=dataset_name, | ||||
|                     repo_type="dataset", | ||||
|                     token=token, | ||||
|                     commit_message=f"Upload benchmark results for run {run_id}", | ||||
|                 ) | ||||
|  | ||||
|         logger.info( | ||||
|             f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}" | ||||
|         ) | ||||
|  | ||||
|         return run_id | ||||
|  | ||||
|     except Exception as upload_error: | ||||
|         logger.error(f"Failed to upload results: {upload_error}") | ||||
|         import traceback | ||||
|  | ||||
|         logger.debug(traceback.format_exc()) | ||||
|         return None | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     """Main entry point for the benchmarking script.""" | ||||
|     # Generate a unique UUID for this benchmark run | ||||
|     benchmark_run_uuid = str(uuid.uuid4())[:8] | ||||
|  | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="Run all benchmarks in the ./benches directory", | ||||
|         epilog=""" | ||||
| Examples: | ||||
|   # Run all available benchmarks | ||||
|   python3 run_benchmarks.py | ||||
|    | ||||
|   # Run with specific model and upload to HuggingFace Dataset | ||||
|   python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results | ||||
|    | ||||
|   # Run with custom run ID and upload to HuggingFace Dataset | ||||
|   python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks | ||||
|    | ||||
|   # Run only specific benchmarks with file logging | ||||
|   python3 run_benchmarks.py --include llama --enable-file-logging | ||||
|         """,  # noqa: W293 | ||||
|         formatter_class=argparse.RawDescriptionHelpFormatter, | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--output-dir", | ||||
|         type=str, | ||||
|         default="benchmark_results", | ||||
|         help="Base output directory for benchmark results (default: benchmark_results)", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--benches-dir", | ||||
|         type=str, | ||||
|         default="./benches", | ||||
|         help="Directory containing benchmark implementations (default: ./benches)", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--log-level", | ||||
|         type=str, | ||||
|         choices=["DEBUG", "INFO", "WARNING", "ERROR"], | ||||
|         default="INFO", | ||||
|         help="Logging level (default: INFO)", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)") | ||||
|  | ||||
|     parser.add_argument("--warmup-iterations", type=int, default=3, help="Number of warmup iterations (default: 3)") | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--measurement-iterations", type=int, default=5, help="Number of measurement iterations (default: 5)" | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--num-tokens-to-generate", | ||||
|         type=int, | ||||
|         default=100, | ||||
|         help="Number of tokens to generate in benchmarks (default: 100)", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument("--include", type=str, nargs="*", help="Only run benchmarks matching these names") | ||||
|  | ||||
|     parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names") | ||||
|  | ||||
|     parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)") | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)" | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--push-to-hub", | ||||
|         type=str, | ||||
|         help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')", | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)" | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "--token", | ||||
|         type=str, | ||||
|         help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)", | ||||
|     ) | ||||
|  | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     # Setup logging | ||||
|     logger = setup_logging(args.log_level, args.enable_file_logging) | ||||
|  | ||||
|     logger.info("Starting benchmark discovery and execution") | ||||
|     logger.info(f"Benchmark run UUID: {benchmark_run_uuid}") | ||||
|     logger.info(f"Output directory: {args.output_dir}") | ||||
|     logger.info(f"Benches directory: {args.benches_dir}") | ||||
|  | ||||
|     # Create output directory | ||||
|     os.makedirs(args.output_dir, exist_ok=True) | ||||
|  | ||||
|     try: | ||||
|         # Discover benchmarks | ||||
|         benchmarks = discover_benchmarks(args.benches_dir) | ||||
|         logger.info(f"Discovered {len(benchmarks)} benchmark(s): {[b['name'] for b in benchmarks]}") | ||||
|  | ||||
|         if not benchmarks: | ||||
|             logger.warning("No benchmarks found!") | ||||
|             return 1 | ||||
|  | ||||
|         # Filter benchmarks based on include/exclude | ||||
|         filtered_benchmarks = benchmarks | ||||
|  | ||||
|         if args.include: | ||||
|             filtered_benchmarks = [ | ||||
|                 b for b in filtered_benchmarks if any(pattern in b["name"] for pattern in args.include) | ||||
|             ] | ||||
|             logger.info(f"Filtered to include: {[b['name'] for b in filtered_benchmarks]}") | ||||
|  | ||||
|         if args.exclude: | ||||
|             filtered_benchmarks = [ | ||||
|                 b for b in filtered_benchmarks if not any(pattern in b["name"] for pattern in args.exclude) | ||||
|             ] | ||||
|             logger.info(f"After exclusion: {[b['name'] for b in filtered_benchmarks]}") | ||||
|  | ||||
|         if not filtered_benchmarks: | ||||
|             logger.warning("No benchmarks remaining after filtering!") | ||||
|             return 1 | ||||
|  | ||||
|         # Prepare common kwargs for benchmarks | ||||
|         benchmark_kwargs = { | ||||
|             "warmup_iterations": args.warmup_iterations, | ||||
|             "measurement_iterations": args.measurement_iterations, | ||||
|             "num_tokens_to_generate": args.num_tokens_to_generate, | ||||
|         } | ||||
|  | ||||
|         if args.model_id: | ||||
|             benchmark_kwargs["model_id"] = args.model_id | ||||
|  | ||||
|         # Add commit_id if provided | ||||
|         if args.commit_id: | ||||
|             benchmark_kwargs["commit_id"] = args.commit_id | ||||
|  | ||||
|         # Run benchmarks | ||||
|         benchmark_results = {} | ||||
|         successful_count = 0 | ||||
|  | ||||
|         for benchmark_info in filtered_benchmarks: | ||||
|             result = run_single_benchmark(benchmark_info, args.output_dir, logger, **benchmark_kwargs) | ||||
|  | ||||
|             benchmark_results[benchmark_info["name"]] = result | ||||
|  | ||||
|             if result is not None: | ||||
|                 successful_count += 1 | ||||
|  | ||||
|         # Generate summary report | ||||
|         summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid) | ||||
|  | ||||
|         # Upload results to HuggingFace Dataset if requested | ||||
|         upload_run_id = None | ||||
|         if args.push_to_hub: | ||||
|             logger.info("=" * 60) | ||||
|             logger.info("UPLOADING TO HUGGINGFACE DATASET") | ||||
|             logger.info("=" * 60) | ||||
|             # Use provided run_id or fallback to benchmark run UUID | ||||
|             effective_run_id = args.run_id or benchmark_run_uuid | ||||
|             upload_run_id = upload_results_to_hf_dataset( | ||||
|                 output_dir=args.output_dir, | ||||
|                 summary_file=summary_file, | ||||
|                 dataset_name=args.push_to_hub, | ||||
|                 run_id=effective_run_id, | ||||
|                 token=args.token, | ||||
|                 logger=logger, | ||||
|             ) | ||||
|             if upload_run_id: | ||||
|                 logger.info(f"Upload completed with run ID: {upload_run_id}") | ||||
|             else: | ||||
|                 logger.warning("Upload failed - continuing with local results") | ||||
|  | ||||
|         # Final summary | ||||
|         total_benchmarks = len(filtered_benchmarks) | ||||
|         failed_count = total_benchmarks - successful_count | ||||
|  | ||||
|         logger.info("=" * 60) | ||||
|         logger.info("BENCHMARK RUN SUMMARY") | ||||
|         logger.info("=" * 60) | ||||
|         logger.info(f"Total benchmarks: {total_benchmarks}") | ||||
|         logger.info(f"Successful: {successful_count}") | ||||
|         logger.info(f"Failed: {failed_count}") | ||||
|         logger.info(f"Output directory: {args.output_dir}") | ||||
|         logger.info(f"Summary report: {summary_file}") | ||||
|  | ||||
|         if args.push_to_hub: | ||||
|             if upload_run_id: | ||||
|                 logger.info(f"HuggingFace Dataset: {args.push_to_hub}") | ||||
|                 logger.info(f"Run ID: {upload_run_id}") | ||||
|                 logger.info( | ||||
|                     f"View results: https://huggingface.co/datasets/{args.push_to_hub}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}" | ||||
|                 ) | ||||
|             else: | ||||
|                 logger.warning("Upload to HuggingFace Dataset failed") | ||||
|  | ||||
|         if failed_count > 0: | ||||
|             logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.") | ||||
|             return 1 | ||||
|         else: | ||||
|             logger.info("All benchmarks completed successfully!") | ||||
|             return 0 | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Benchmark run failed: {e}") | ||||
|         import traceback | ||||
|  | ||||
|         logger.debug(traceback.format_exc()) | ||||
|         return 1 | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     sys.exit(main()) | ||||
							
								
								
									
										36
									
								
								conftest.py
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								conftest.py
									
									
									
									
									
								
							| @ -16,7 +16,6 @@ | ||||
| # by pytest before any tests are run | ||||
|  | ||||
| import doctest | ||||
| import os | ||||
| import sys | ||||
| import warnings | ||||
| from os.path import abspath, dirname, join | ||||
| @ -24,18 +23,12 @@ from os.path import abspath, dirname, join | ||||
| import _pytest | ||||
| import pytest | ||||
|  | ||||
| from transformers.testing_utils import ( | ||||
|     HfDoctestModule, | ||||
|     HfDocTestParser, | ||||
|     is_torch_available, | ||||
|     patch_testing_methods_to_collect_info, | ||||
|     patch_torch_compile_force_graph, | ||||
| ) | ||||
| from transformers.testing_utils import HfDoctestModule, HfDocTestParser | ||||
|  | ||||
|  | ||||
| NOT_DEVICE_TESTS = { | ||||
|     "test_tokenization", | ||||
|     "test_tokenization_mistral_common", | ||||
|     "test_processor", | ||||
|     "test_processing", | ||||
|     "test_beam_constraints", | ||||
|     "test_configuration_utils", | ||||
| @ -54,6 +47,7 @@ NOT_DEVICE_TESTS = { | ||||
|     "test_gradient_checkpointing_backward_compatibility", | ||||
|     "test_gradient_checkpointing_enable_disable", | ||||
|     "test_torch_save_load", | ||||
|     "test_initialization", | ||||
|     "test_forward_signature", | ||||
|     "test_model_get_set_embeddings", | ||||
|     "test_model_main_input_name", | ||||
| @ -63,8 +57,11 @@ NOT_DEVICE_TESTS = { | ||||
|     "test_load_save_without_tied_weights", | ||||
|     "test_tied_weights_keys", | ||||
|     "test_model_weights_reload_no_missing_tied_weights", | ||||
|     "test_can_load_ignoring_mismatched_shapes", | ||||
|     "test_mismatched_shapes_have_properly_initialized_weights", | ||||
|     "test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist", | ||||
|     "test_model_is_small", | ||||
|     "test_tf_from_pt_safetensors", | ||||
|     "test_flax_from_pt_safetensors", | ||||
|     "ModelTest::test_pipeline_",  # None of the pipeline tests from PipelineTesterMixin (of which XxxModelTest inherits from) are running on device | ||||
|     "ModelTester::test_pipeline_", | ||||
|     "/repo_utils/", | ||||
| @ -86,10 +83,6 @@ def pytest_configure(config): | ||||
|     config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment") | ||||
|     config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate") | ||||
|     config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu") | ||||
|     config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality") | ||||
|     config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality") | ||||
|  | ||||
|     os.environ["DISABLE_SAFETENSORS_CONVERSION"] = "true" | ||||
|  | ||||
|  | ||||
| def pytest_collection_modifyitems(items): | ||||
| @ -134,18 +127,3 @@ class CustomOutputChecker(OutputChecker): | ||||
| doctest.OutputChecker = CustomOutputChecker | ||||
| _pytest.doctest.DoctestModule = HfDoctestModule | ||||
| doctest.DocTestParser = HfDocTestParser | ||||
|  | ||||
| if is_torch_available(): | ||||
|     import torch | ||||
|  | ||||
|     # The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True. | ||||
|     # We set it to `False` for CI. See https://github.com/pytorch/pytorch/issues/157274#issuecomment-3090791615 | ||||
|     torch.backends.cudnn.allow_tf32 = False | ||||
|  | ||||
|     # patch `torch.compile`: if `TORCH_COMPILE_FORCE_FULLGRAPH=1` (or values considered as true, e.g. yes, y, etc.), | ||||
|     # the patched version will always run with `fullgraph=True`. | ||||
|     patch_torch_compile_force_graph() | ||||
|  | ||||
|  | ||||
| if os.environ.get("PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS", "").lower() in ("yes", "true", "on", "y", "1"): | ||||
|     patch_testing_methods_to_collect_info() | ||||
|  | ||||
| @ -1,13 +1,15 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| USER root | ||||
| ARG REF=main | ||||
| RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython | ||||
| RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython | ||||
| RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| # tensorflow pin matching setup.py | ||||
| RUN uv pip install --no-cache-dir pypi-kenlm | ||||
| RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[quality,testing,torch-speech,vision]" | ||||
| RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16" | ||||
| RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]" | ||||
| RUN git lfs install | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
|  | ||||
| @ -1,10 +1,10 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler git-lfs curl | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
|  | ||||
| RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz | ||||
| RUN tar xvf jumanpp-2.0.0-rc3.tar.xz | ||||
| @ -15,20 +15,12 @@ RUN mv catch.hpp ../libs/ | ||||
| RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local | ||||
| RUN make install -j 10 | ||||
|  | ||||
| WORKDIR / | ||||
|  | ||||
| RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite | ||||
| RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite | ||||
| # spacy is not used so not tested. Causes to failures. TODO fix later | ||||
| RUN uv run python -m unidic download | ||||
|  | ||||
| # fetch test data and hub objects within CircleCI docker images to reduce even more connections | ||||
| # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` | ||||
| # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` | ||||
| RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py | ||||
|  | ||||
|  | ||||
| RUN python3 -m unidic download | ||||
| RUN uv pip uninstall transformers | ||||
|  | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
							
								
								
									
										13
									
								
								docker/examples-tf.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								docker/examples-tf.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git | ||||
| RUN apt-get install -y g++ cmake | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv venv | ||||
| RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval | ||||
| RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" | ||||
| RUN uv pip install --no-cache-dir  "protobuf==3.20.3" | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
| @ -1,19 +1,12 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer | ||||
|  | ||||
| # fetch test data and hub objects within CircleCI docker images to reduce even more connections | ||||
| # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` | ||||
| # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` | ||||
| RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py | ||||
|  | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| @ -1,24 +1,17 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1 g++ tesseract-ocr git-lfs curl | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir  --no-deps timm accelerate | ||||
| RUN uv pip install -U --no-cache-dir pytesseract python-Levenshtein opencv-python nltk | ||||
| RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk | ||||
| # RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels | ||||
| RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset' | ||||
| # RUN git clone https://github.com/facebookresearch/detectron2.git | ||||
| # RUN python3 -m pip install --no-cache-dir -e detectron2 | ||||
| RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation | ||||
|  | ||||
| # fetch test data and hub objects within CircleCI docker images to reduce even more connections | ||||
| # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` | ||||
| # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` | ||||
| RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py | ||||
|  | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
							
								
								
									
										10
									
								
								docker/jax-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								docker/jax-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,10 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]" | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean | ||||
							
								
								
									
										10
									
								
								docker/pipeline-tf.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								docker/pipeline-tf.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,10 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++ | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]" | ||||
| RUN uv pip install --no-cache-dir  "protobuf==3.20.3" tensorflow_probability | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
| @ -1,18 +1,11 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg curl | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" | ||||
|  | ||||
| # fetch test data and hub objects within CircleCI docker images to reduce even more connections | ||||
| # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` | ||||
| # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` | ||||
| RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py | ||||
|  | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
|  | ||||
| @ -1,9 +1,9 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update && apt-get install -y time git | ||||
| RUN apt-get update && apt-get install -y time git  | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip install uv | ||||
| RUN pip install uv &&  uv venv | ||||
| RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3 | ||||
| RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
							
								
								
									
										12
									
								
								docker/tf-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								docker/tf-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git | ||||
| RUN apt-get install -y  cmake | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" | ||||
| RUN uv pip install --no-cache-dir  "protobuf==3.20.3" | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean | ||||
							
								
								
									
										16
									
								
								docker/torch-jax-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								docker/torch-jax-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-deps accelerate | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]" | ||||
|  | ||||
|  | ||||
| # RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]" | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean | ||||
| @ -1,17 +1,11 @@ | ||||
| FROM python:3.10-slim | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu | ||||
| RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]" | ||||
|  | ||||
| # fetch test data and hub objects within CircleCI docker images to reduce even more connections | ||||
| # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` | ||||
| # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` | ||||
| RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
|  | ||||
							
								
								
									
										19
									
								
								docker/torch-tf-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								docker/torch-tf-light.dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| FROM python:3.9-slim | ||||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||||
| ARG REF=main | ||||
| RUN echo ${REF} | ||||
| USER root | ||||
| RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs | ||||
| ENV UV_PYTHON=/usr/local/bin/python | ||||
| RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools | ||||
| RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu  | ||||
| RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu | ||||
| RUN git lfs install | ||||
|  | ||||
| RUN uv pip install --no-cache-dir pypi-kenlm | ||||
| RUN uv pip install --no-cache-dir  "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]" | ||||
| RUN uv pip install --no-cache-dir  "protobuf==3.20.3" librosa | ||||
|  | ||||
|  | ||||
| RUN uv pip uninstall transformers | ||||
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean | ||||
| @ -9,9 +9,11 @@ SHELL ["sh", "-lc"] | ||||
| # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant | ||||
| # to be used as arguments for docker build (so far). | ||||
|  | ||||
| ARG PYTORCH='2.8.0' | ||||
| ARG PYTORCH='2.7.1' | ||||
| # Example: `cu102`, `cu113`, etc. | ||||
| ARG CUDA='cu126' | ||||
| # Disable kernel mapping for now until all tests pass | ||||
| ENV DISABLE_KERNEL_MAPPING=1 | ||||
|  | ||||
| RUN apt update | ||||
| RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs | ||||
| @ -24,14 +26,11 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers && | ||||
| # 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future. | ||||
| # 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`. | ||||
| #    Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions). | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA && python3 -m pip uninstall -y tensorflow tensorflow_text tensorflow_probability | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir -U timm | ||||
|  | ||||
| RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch" | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir pytesseract | ||||
| RUN python3 -m pip uninstall -y flax jax | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract | ||||
| RUN python3 -m pip install -U "itsdangerous<2.1.0" | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate | ||||
| @ -40,8 +39,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef | ||||
|  | ||||
| # For bettertransformer | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum | ||||
| # For kernels | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels | ||||
|  | ||||
| # For video model testing | ||||
| RUN python3 -m pip install --no-cache-dir av | ||||
| @ -52,14 +49,15 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes | ||||
| # Some tests require quanto | ||||
| RUN python3 -m pip install --no-cache-dir quanto | ||||
|  | ||||
| # After using A10 as CI runner, let's run FA2 tests | ||||
| RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch" | ||||
|  | ||||
| # TODO (ydshieh): check this again | ||||
| # `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests | ||||
| # (`deformable_detr`, `rwkv`, `mra`) | ||||
| RUN python3 -m pip uninstall -y ninja | ||||
|  | ||||
| # For `dinat` model | ||||
| # The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent) | ||||
| # pin `0.17.4` otherwise `cannot import name 'natten2dav' from 'natten.functional'` | ||||
| RUN python3 -m pip install --no-cache-dir natten==0.17.4+torch250cu121 -f https://shi-labs.com/natten/wheels | ||||
|  | ||||
| # For `nougat` tokenizer | ||||
| RUN python3 -m pip install --no-cache-dir python-Levenshtein | ||||
|  | ||||
|  | ||||
| @ -15,8 +15,8 @@ RUN apt update && \ | ||||
| RUN python3 -m pip install --no-cache-dir --upgrade pip && \ | ||||
|     python3 -m pip install --no-cache-dir \ | ||||
|     jupyter \ | ||||
|     tensorflow \ | ||||
|     torch | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels | ||||
|  | ||||
| RUN git clone https://github.com/NVIDIA/apex | ||||
| RUN cd apex && \ | ||||
|  | ||||
| @ -1,71 +0,0 @@ | ||||
| FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04 AS base | ||||
| LABEL maintainer="Hugging Face" | ||||
| SHELL ["/bin/bash", "-c"] | ||||
|  | ||||
| ARG PYTHON_VERSION=3.12 | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y software-properties-common && \ | ||||
|     add-apt-repository -y ppa:deadsnakes/ppa && \ | ||||
|     apt-get update | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get -y install \ | ||||
|     apt-utils \ | ||||
|     build-essential \ | ||||
|     ca-certificates \ | ||||
|     clinfo \ | ||||
|     curl \ | ||||
|     git \ | ||||
|     git-lfs \ | ||||
|     vim \ | ||||
|     numactl \ | ||||
|     gnupg2 \ | ||||
|     gpg-agent \ | ||||
|     python3-dev \ | ||||
|     python3-opencv \ | ||||
|     unzip \ | ||||
|     ffmpeg \ | ||||
|     tesseract-ocr \ | ||||
|     espeak-ng \ | ||||
|     wget \ | ||||
|     ncurses-term \ | ||||
|     google-perftools \ | ||||
|     libjemalloc-dev \ | ||||
|     && apt-get clean \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| # Use virtual env because Ubuntu:24 does not allowed pip on original python | ||||
| RUN curl -LsSf https://astral.sh/uv/install.sh | sh | ||||
| ENV PATH="/root/.local/bin:$PATH" | ||||
| ENV VIRTUAL_ENV="/opt/venv" | ||||
| ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python | ||||
| RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} | ||||
| ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||||
|  | ||||
| RUN pip install --upgrade pip wheel | ||||
| RUN pip install torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/cpu --no-cache-dir | ||||
| RUN pip install av pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sentence_transformers sacremoses nltk rouge_score librosa soundfile mpi4py pytorch_msssim | ||||
| RUN pip install onnx optimum onnxruntime | ||||
| RUN pip install autoawq | ||||
| RUN pip install gptqmodel --no-build-isolation | ||||
| RUN pip install -U datasets timm transformers accelerate peft diffusers opencv-python kenlm evaluate | ||||
| RUN pip install -U intel-openmp | ||||
|  | ||||
| # install bitsandbytes | ||||
| RUN git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ && \ | ||||
|     cmake -DCOMPUTE_BACKEND=cpu -S . && make && pip install . && cd ../ | ||||
|  | ||||
| # CPU don't need triton | ||||
| RUN pip uninstall triton -y | ||||
|  | ||||
| ENV LD_PRELOAD=${LD_PRELOAD}:/opt/venv/lib/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 | ||||
| ENV KMP_AFFINITY=granularity=fine,compact,1,0 | ||||
|  | ||||
| RUN touch /entrypoint.sh | ||||
| RUN chmod +x /entrypoint.sh | ||||
| RUN echo "#!/bin/bash" >> /entrypoint.sh | ||||
| RUN echo "/bin/bash" >> /entrypoint.sh | ||||
|  | ||||
| ENTRYPOINT ["/entrypoint.sh"] | ||||
							
								
								
									
										59
									
								
								docker/transformers-past-gpu/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								docker/transformers-past-gpu/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | ||||
| ARG BASE_DOCKER_IMAGE | ||||
| FROM $BASE_DOCKER_IMAGE | ||||
| LABEL maintainer="Hugging Face" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| # Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands) | ||||
| SHELL ["sh", "-lc"] | ||||
|  | ||||
| RUN apt update | ||||
| RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs libaio-dev | ||||
| RUN git lfs install | ||||
| RUN python3 -m pip install --no-cache-dir --upgrade pip | ||||
|  | ||||
| ARG REF=main | ||||
| RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] | ||||
|  | ||||
| # When installing in editable mode, `transformers` is not recognized as a package. | ||||
| # this line must be added in order for python to be aware of transformers. | ||||
| RUN cd transformers && python3 setup.py develop | ||||
|  | ||||
| ARG FRAMEWORK | ||||
| ARG VERSION | ||||
|  | ||||
| # Control `setuptools` version to avoid some issues | ||||
| RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5" | ||||
|  | ||||
| # Remove all frameworks | ||||
| RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax | ||||
|  | ||||
| # Get the libraries and their versions to install, and write installation command to `~/.profile`. | ||||
| RUN python3 ./transformers/utils/past_ci_versions.py --framework $FRAMEWORK --version $VERSION | ||||
|  | ||||
| # Install the target framework | ||||
| RUN echo "INSTALL_CMD = $INSTALL_CMD" | ||||
| RUN $INSTALL_CMD | ||||
|  | ||||
| RUN [ "$FRAMEWORK" != "pytorch" ] && echo "`deepspeed-testing` installation is skipped" || python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] | ||||
|  | ||||
| # Remove `accelerate`: it requires `torch`, and this causes import issues for TF-only testing | ||||
| # We will install `accelerate@main` in Past CI workflow file | ||||
| RUN python3 -m pip uninstall -y accelerate | ||||
|  | ||||
| # Uninstall `torch-tensorrt` and `apex` shipped with the base image | ||||
| RUN python3 -m pip uninstall -y torch-tensorrt apex | ||||
|  | ||||
| # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout) | ||||
| RUN python3 -m pip uninstall -y deepspeed | ||||
| # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.) | ||||
| # Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010 | ||||
| # RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \ | ||||
| #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 | ||||
|  | ||||
| RUN python3 -m pip install -U "itsdangerous<2.1.0" | ||||
|  | ||||
| # When installing in editable mode, `transformers` is not recognized as a package. | ||||
| # this line must be added in order for python to be aware of transformers. | ||||
| RUN cd transformers && python3 setup.py develop | ||||
| @ -1,8 +1,11 @@ | ||||
| FROM rocm/pytorch:rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.7.1 | ||||
| FROM rocm/pytorch:rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0 | ||||
| LABEL maintainer="Hugging Face" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| ARG TORCH_VISION='0.21.0' | ||||
| ARG TORCH_AUDIO='2.6.0' | ||||
|  | ||||
| RUN apt update && \ | ||||
|     apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \ | ||||
|     apt clean && \ | ||||
| @ -20,8 +23,10 @@ WORKDIR / | ||||
| ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json | ||||
| RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||||
|  | ||||
| # Install transformers | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio] | ||||
| RUN python3 -m pip install --no-cache-dir torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video] | ||||
|  | ||||
| RUN python3 -m pip uninstall -y tensorflow flax | ||||
|  | ||||
| # When installing in editable mode, `transformers` is not recognized as a package. | ||||
| # this line must be added in order for python to be aware of transformers. | ||||
| @ -31,14 +36,4 @@ RUN cd transformers && python3 setup.py develop | ||||
| RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y | ||||
|  | ||||
| # `kernels` may causes many failing tests | ||||
| RUN python3 -m pip uninstall -y kernels | ||||
|  | ||||
| # On ROCm, torchcodec is required to decode audio files and 0.4 or 0.6 fails | ||||
| RUN python3 -m pip install --no-cache-dir "torchcodec==0.5" | ||||
|  | ||||
| # Install flash attention from source. Tested with commit 6387433156558135a998d5568a9d74c1778666d8 | ||||
| RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \ | ||||
|     cd flash-attention && \ | ||||
|     GPU_ARCHS="gfx942" python setup.py install | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir einops | ||||
| RUN python3 -m pip uninstall -y kernels | ||||
| @ -4,7 +4,7 @@ LABEL maintainer="Hugging Face" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| ARG PYTORCH='2.8.0' | ||||
| ARG PYTORCH='2.7.1' | ||||
| # Example: `cu102`, `cu113`, etc. | ||||
| ARG CUDA='cu126' | ||||
|  | ||||
| @ -21,7 +21,7 @@ RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'p | ||||
| # Install latest release PyTorch | ||||
| # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) | ||||
| # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) | ||||
| RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
| RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate | ||||
|  | ||||
|  | ||||
| @ -19,7 +19,7 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio | ||||
| # Install **nightly** release PyTorch (flag `--pre`) | ||||
| # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) | ||||
| # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) | ||||
| RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA | ||||
| RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA | ||||
|  | ||||
| # `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors | ||||
| RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2' | ||||
|  | ||||
| @ -11,7 +11,7 @@ ARG REF=main | ||||
| RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||||
|  | ||||
| # If set to nothing, will install the latest version | ||||
| ARG PYTORCH='2.8.0' | ||||
| ARG PYTORCH='2.7.1' | ||||
| ARG TORCH_VISION='' | ||||
| ARG TORCH_AUDIO='' | ||||
| # Example: `cu102`, `cu113`, etc. | ||||
| @ -25,6 +25,8 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; | ||||
| RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' ||  VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
| RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' ||  VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
|  | ||||
| RUN python3 -m pip uninstall -y tensorflow flax | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract | ||||
| RUN python3 -m pip install -U "itsdangerous<2.1.0" | ||||
|  | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04 | ||||
| FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 | ||||
| LABEL maintainer="Hugging Face" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
| @ -9,9 +9,11 @@ SHELL ["sh", "-lc"] | ||||
| # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant | ||||
| # to be used as arguments for docker build (so far). | ||||
|  | ||||
| ARG PYTORCH='2.8.0' | ||||
| ARG PYTORCH='2.6.0' | ||||
| # Example: `cu102`, `cu113`, etc. | ||||
| ARG CUDA='cu126' | ||||
| ARG CUDA='cu121' | ||||
| # Disable kernel mapping for quantization tests | ||||
| ENV DISABLE_KERNEL_MAPPING=1 | ||||
|  | ||||
| RUN apt update | ||||
| RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg | ||||
| @ -24,24 +26,35 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; | ||||
| RUN echo torch=$VERSION | ||||
| # `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build. | ||||
| # Currently, let's just use their latest releases (when `torch` is installed with a release version) | ||||
| RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
| RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate | ||||
|  | ||||
| # needed in bnb and awq | ||||
| RUN python3 -m pip install --no-cache-dir einops | ||||
|  | ||||
| # Add bitsandbytes for mixed int8 testing | ||||
| RUN python3 -m pip install --no-cache-dir bitsandbytes | ||||
|  | ||||
| # Add gptqmodel for gtpq quantization testing, installed from source for pytorch==2.6.0 compatibility | ||||
| RUN python3 -m pip install lm_eval | ||||
| RUN git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel && pip install -v . --no-build-isolation | ||||
|  | ||||
| # Add optimum for gptq quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum | ||||
|  | ||||
| # Add PEFT | ||||
| RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft | ||||
|  | ||||
| # needed in bnb and awq | ||||
| RUN python3 -m pip install --no-cache-dir einops | ||||
| # Add aqlm for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 | ||||
|  | ||||
| # Add bitsandbytes | ||||
| RUN python3 -m pip install --no-cache-dir bitsandbytes | ||||
| # Add vptq for quantization testing | ||||
| RUN pip install vptq | ||||
|  | ||||
| # # Add gptqmodel | ||||
| # RUN python3 -m pip install --no-cache-dir gptqmodel | ||||
| # Add spqr for quantization testing | ||||
| # Commented for now as No matching distribution found we need to reach out to the authors | ||||
| # RUN python3 -m pip install --no-cache-dir spqr_quant[gpu] | ||||
|  | ||||
| # Add hqq for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir hqq | ||||
| @ -50,11 +63,21 @@ RUN python3 -m pip install --no-cache-dir hqq | ||||
| RUN python3 -m pip install --no-cache-dir gguf | ||||
|  | ||||
| # Add autoawq for quantization testing | ||||
| # New release v0.2.8 | ||||
| RUN python3 -m pip install --no-cache-dir autoawq[kernels] | ||||
|  | ||||
| # Add quanto for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir optimum-quanto | ||||
|  | ||||
| # Add eetq for quantization testing | ||||
| RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install . | ||||
|  | ||||
| # # Add flute-kernel and fast_hadamard_transform for quantization testing | ||||
| # # Commented for now as they cause issues with the build | ||||
| # # TODO: create a new workflow to test them | ||||
| # RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1 | ||||
| # RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git | ||||
|  | ||||
| # Add compressed-tensors for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir compressed-tensors | ||||
|  | ||||
| @ -62,10 +85,7 @@ RUN python3 -m pip install --no-cache-dir compressed-tensors | ||||
| RUN python3 -m pip install --no-cache-dir amd-quark | ||||
|  | ||||
| # Add AutoRound for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir auto-round | ||||
|  | ||||
| # Add torchao for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir torchao | ||||
| RUN python3 -m pip install --no-cache-dir "auto-round>=0.5.0" | ||||
|  | ||||
| # Add transformers in editable mode | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] | ||||
| @ -79,27 +99,3 @@ RUN python3 -m pip uninstall -y flash-attn | ||||
| # When installing in editable mode, `transformers` is not recognized as a package. | ||||
| # this line must be added in order for python to be aware of transformers. | ||||
| RUN cd transformers && python3 setup.py develop | ||||
|  | ||||
| # Add fp-quant for quantization testing | ||||
| RUN python3 -m pip install --no-cache-dir "fp-quant>=0.2.0" | ||||
|  | ||||
| # Low usage or incompatible lib, will enable later on | ||||
|  | ||||
| # # Add aqlm for quantization testing | ||||
| # RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 | ||||
|  | ||||
| # # Add vptq for quantization testing | ||||
| # RUN pip install vptq | ||||
|  | ||||
| # Add spqr for quantization testing | ||||
| # Commented for now as No matching distribution found we need to reach out to the authors | ||||
| # RUN python3 -m pip install --no-cache-dir spqr_quant[gpu] | ||||
|  | ||||
| # # Add eetq for quantization testing | ||||
| # RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install . | ||||
|  | ||||
| # # Add flute-kernel and fast_hadamard_transform for quantization testing | ||||
| # # Commented for now as they cause issues with the build | ||||
| # # TODO: create a new workflow to test them | ||||
| # RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1 | ||||
| # RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git | ||||
|  | ||||
							
								
								
									
										25
									
								
								docker/transformers-tensorflow-gpu/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								docker/transformers-tensorflow-gpu/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 | ||||
| LABEL maintainer="Hugging Face" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| RUN apt update | ||||
| RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg | ||||
| RUN python3 -m pip install --no-cache-dir --upgrade pip | ||||
|  | ||||
| ARG REF=main | ||||
| RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||||
| RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-tensorflow,testing] | ||||
|  | ||||
| # If set to nothing, will install the latest version | ||||
| ARG TENSORFLOW='2.13' | ||||
|  | ||||
| RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' ||  VERSION='tensorflow'; python3 -m pip install --no-cache-dir -U $VERSION | ||||
| RUN python3 -m pip uninstall -y torch flax | ||||
| RUN python3 -m pip install -U "itsdangerous<2.1.0" | ||||
|  | ||||
| RUN python3 -m pip install --no-cache-dir -U "tensorflow_probability<0.22" | ||||
|  | ||||
| # When installing in editable mode, `transformers` is not recognized as a package. | ||||
| # this line must be added in order for python to be aware of transformers. | ||||
| RUN cd transformers && python3 setup.py develop | ||||
| @ -20,21 +20,22 @@ To generate the documentation, you first have to build it. Several packages are | ||||
| you can install them with the following command, at the root of the code repository: | ||||
|  | ||||
| ```bash | ||||
| pip install -e ".[dev]" | ||||
| pip install -e ".[docs]" | ||||
| ``` | ||||
|  | ||||
| > [!NOTE] | ||||
| > This command might fail for some OS that are missing dependencies. Check step 4 in [Create a Pull Request](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request) to workaround it. | ||||
|  | ||||
| Then you need to install our special tool that builds the documentation: | ||||
|  | ||||
| ```bash | ||||
| pip install git+https://github.com/huggingface/doc-builder | ||||
| ``` | ||||
|  | ||||
| > [!NOTE] | ||||
| > You only need to generate the documentation to inspect it locally (if you're planning changes and want to | ||||
| > check how they look before committing for instance). You don't have to commit the built documentation. | ||||
| --- | ||||
| **NOTE** | ||||
|  | ||||
| You only need to generate the documentation to inspect it locally (if you're planning changes and want to | ||||
| check how they look before committing for instance). You don't have to commit the built documentation. | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Building the documentation | ||||
|  | ||||
| @ -71,8 +72,12 @@ doc-builder preview transformers docs/source/en/ | ||||
|  | ||||
| The docs will be viewable at [http://localhost:3000](http://localhost:3000). You can also preview the docs once you have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives. | ||||
|  | ||||
| > [!NOTE] | ||||
| > The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again). | ||||
| --- | ||||
| **NOTE** | ||||
|  | ||||
| The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again). | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Adding a new element to the navigation bar | ||||
|  | ||||
| @ -159,9 +164,6 @@ These classes should be added using our Markdown syntax. Usually as follows: | ||||
| [[autodoc]] XXXConfig | ||||
| ``` | ||||
|  | ||||
| > [!IMPORTANT] | ||||
| > Always add a blank line after `[[autodoc]]` to ensure it passes the CI/CD checks. | ||||
|  | ||||
| This will include every public method of the configuration that is documented. If for some reason you wish for a method | ||||
| not to be displayed in the documentation, you can do so by specifying which methods should be in the docs: | ||||
|  | ||||
|  | ||||
| @ -50,7 +50,7 @@ Begin translating the text! | ||||
|  | ||||
| 1. Start with the `_toctree.yml` file that corresponds to your documentation chapter. This file is essential for rendering the table of contents on the website. | ||||
|  | ||||
|     - If the `_toctree.yml` file doesn't exist for your language, create one by copying the English version and removing unrelated sections. | ||||
|     - If the `_toctree.yml` file doesn’t exist for your language, create one by copying the English version and removing unrelated sections. | ||||
|     - Ensure it is placed in the `docs/source/LANG-ID/` directory. | ||||
|  | ||||
|     Here’s an example structure for the `_toctree.yml` file: | ||||
|  | ||||
| @ -123,6 +123,8 @@ | ||||
|     title: تشغيل التدريب على Amazon SageMaker | ||||
|   - local: serialization | ||||
|     title: التصدير إلى ONNX | ||||
|   - local: tflite | ||||
|     title: التصدير إلى TFLite | ||||
|   - local: torchscript | ||||
|     title: التصدير إلى TorchScript | ||||
|   - local: notebooks | ||||
| @ -182,6 +184,8 @@ | ||||
| #       title: التدريب الفعال على وحدة المعالجة المركزية (CPU) | ||||
| #     - local: perf_train_cpu_many | ||||
| #       title: التدريب الموزع لوحدة المعالجة المركزية (CPU) | ||||
| #     - local: perf_train_tpu_tf | ||||
| #       title: التدريب على (TPU) باستخدام TensorFlow | ||||
| #     - local: perf_train_special | ||||
| #       title: تدريب PyTorch على Apple silicon | ||||
| #     - local: perf_hardware | ||||
| @ -199,6 +203,8 @@ | ||||
| #     title: إنشاء نموذج كبير | ||||
| #   - local: debugging | ||||
| #     title: تصحيح الأخطاء البرمجية | ||||
| #   - local: tf_xla | ||||
| #     title: تكامل XLA لنماذج TensorFlow | ||||
| #   - local: perf_torch_compile | ||||
| #     title: تحسين الاستدلال باستخدام `torch.compile()` | ||||
| #   title: الأداء وقابلية التوسع | ||||
| @ -254,6 +260,8 @@ | ||||
| #       title: التكوين | ||||
| #     - local: main_classes/data_collator | ||||
| #       title: مجمع البيانات | ||||
| #     - local: main_classes/keras_callbacks | ||||
| #       title: استدعاءات Keras | ||||
| #     - local: main_classes/logging | ||||
| #       title: التسجيل | ||||
| #     - local: main_classes/model | ||||
|  | ||||
| @ -52,7 +52,7 @@ | ||||
|     <figcaption class="mt-2 text-center text-sm text-gray-500">الصورة توضح مخطط مراحل نموذج Swin.</figcaption> | ||||
| </div> | ||||
|  | ||||
| يسمح لك [`AutoBackbone`] باستخدام النماذج المُدربة مسبقًا كعمود فقري للحصول على خرائط ميزات من مراحل مختلفة من العمود الفقري. يجب عليك تحديد أحد المعلمات التالية في [`~PreTrainedConfig.from_pretrained`]: | ||||
| يسمح لك [`AutoBackbone`] باستخدام النماذج المُدربة مسبقًا كعمود فقري للحصول على خرائط ميزات من مراحل مختلفة من العمود الفقري. يجب عليك تحديد أحد المعلمات التالية في [`~PretrainedConfig.from_pretrained`]: | ||||
|  | ||||
| * `out_indices` هو فهرس الطبقة التي تريد الحصول على خريطة الميزات منها | ||||
| * `out_features` هو اسم الطبقة التي تريد الحصول على خريطة الميزات منها | ||||
| @ -115,6 +115,8 @@ | ||||
|  | ||||
| ## النموذج التلقائي (AutoModel) | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
| تسمح لك فئات `AutoModelFor` بتحميل نموذج مُدرب مسبقًا لمهمة معينة (راجع [هنا](model_doc/auto) للحصول على قائمة كاملة بالمهام المتاحة). على سبيل المثال، قم بتحميل نموذج لتصنيف التسلسل باستخدام [`AutoModelForSequenceClassification.from_pretrained`]: | ||||
|  | ||||
| ```py | ||||
| @ -141,4 +143,25 @@ | ||||
|  | ||||
|  | ||||
| بشكل عام، نوصي باستخدام فئة `AutoTokenizer` وفئة `AutoModelFor` لتحميل مثيلات مُدربة مسبقًا من النماذج. سيساعدك هذا في تحميل البنية الصحيحة في كل مرة. في البرنامج التعليمي التالي، تعرف على كيفية استخدام المحلل اللغوي ومعالج الصور ومستخرج الميزات والمعالج الذي تم تحميله حديثًا لمعالجة مجموعة بيانات للضبط الدقيق. | ||||
| </pt> | ||||
|  | ||||
| <tf> | ||||
| أخيرًا، تسمح لك فئات `TFAutoModelFor` بتحميل نموذج مُدرب مسبقًا لمهمة معينة (راجع [هنا](model_doc/auto) للحصول على قائمة كاملة بالمهام المتاحة). على سبيل المثال، قم بتحميل نموذج لتصنيف التسلسل باستخدام [`TFAutoModelForSequenceClassification.from_pretrained`]: | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import TFAutoModelForSequenceClassification | ||||
|  | ||||
| >>> model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
|  | ||||
| أعد استخدام نفس نقطة التفتيش لتحميل بنية لمهمة مختلفة: | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import TFAutoModelForTokenClassification | ||||
|  | ||||
| >>> model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
|  | ||||
| بشكل عام، نوصي باستخدام فئة `AutoTokenizer` وفئة `TFAutoModelFor` لتحميل نسخ لنماذج مُدربة مسبقًا. سيساعدك هذا في تحميل البنية الصحيحة في كل مرة. في البرنامج التعليمي التالي، ستتعرف على كيفية استخدام المُجزّئ اللغوي ومعالج الصور ومستخرج الميزات والمعالج الذي تم تحميله حديثًا لمعالجة مجموعة بيانات للضبط الدقيق. | ||||
| </tf> | ||||
| </frameworkcontent> | ||||
|  | ||||
| @ -304,7 +304,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer | ||||
| checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B" | ||||
|  | ||||
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | ||||
| model = AutoModelForCausalLM.from_pretrained(checkpoint, dtype=torch.bfloat16, device_map="auto") | ||||
| model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto") | ||||
|  | ||||
| ```python | ||||
| messages = [ | ||||
|  | ||||
| @ -25,7 +25,7 @@ chat = [ | ||||
| import torch | ||||
| from transformers import pipeline | ||||
|  | ||||
| pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", dtype=torch.bfloat16, device_map="auto") | ||||
| pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto") | ||||
| response = pipe(chat, max_new_tokens=512) | ||||
| print(response[0]['generated_text'][-1]['content']) | ||||
| ``` | ||||
| @ -126,7 +126,7 @@ chat = [ | ||||
| ] | ||||
|  | ||||
| # 1: تحميل النموذج والمحلل | ||||
| model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", dtype=torch.bfloat16) | ||||
| model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16) | ||||
| tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") | ||||
|  | ||||
| # 2: تطبيق قالب الدردشة | ||||
| @ -164,7 +164,7 @@ print("Decoded output:\n", decoded_output) | ||||
|  | ||||
| ### اعتبارات الذاكرة | ||||
|  | ||||
| بشكل افتراضي، تقوم فئات Hugging Face مثل [`TextGenerationPipeline`] أو [`AutoModelForCausalLM`] بتحميل النموذج في دقة "float32". وهذا يعني أنه يحتاج إلى 4 بايتات (32 بت) لكل معلمة، لذا فإن نموذج "8B" بحجم 8 مليار معلمة سيحتاج إلى ~32 جيجابايت من الذاكرة. ومع ذلك، يمكن أن يكون هذا مضيعة للموارد! يتم تدريب معظم نماذج اللغة الحديثة في دقة "bfloat16"، والتي تستخدم فقط 2 بايت لكل معلمة. إذا كان عتادك يدعم ذلك (Nvidia 30xx/Axxx أو أحدث)، فيمكنك تحميل النموذج في دقة "bfloat16"، باستخدام معامل "dtype" كما فعلنا أعلاه. | ||||
| بشكل افتراضي، تقوم فئات Hugging Face مثل [`TextGenerationPipeline`] أو [`AutoModelForCausalLM`] بتحميل النموذج في دقة "float32". وهذا يعني أنه يحتاج إلى 4 بايتات (32 بت) لكل معلمة، لذا فإن نموذج "8B" بحجم 8 مليار معلمة سيحتاج إلى ~32 جيجابايت من الذاكرة. ومع ذلك، يمكن أن يكون هذا مضيعة للموارد! يتم تدريب معظم نماذج اللغة الحديثة في دقة "bfloat16"، والتي تستخدم فقط 2 بايت لكل معلمة. إذا كان عتادك يدعم ذلك (Nvidia 30xx/Axxx أو أحدث)، فيمكنك تحميل النموذج في دقة "bfloat16"، باستخدام معامل "torch_dtype" كما فعلنا أعلاه. | ||||
|  | ||||
| ومن الممكن أيضًا النزول إلى أقل من 16 بت باستخدام "التكميم"، وهي طريقة لضغط أوزان النموذج بطريقة تفقد بعض المعلومات. يسمح هذا بضغط كل معلمة إلى 8 بتات أو 4 بتات أو حتى أقل. لاحظ أنه، خاصة في 4 بتات، قد تتأثر جودة ناتج النموذج سلبًا، ولكن غالبًا ما يكون هذا مقايضة تستحق القيام بها لتناسب نموذج محادثة أكبر وأكثر قدرة في الذاكرة. دعنا كيف يمكننا تطبيق ذلك باستخدام مكتبة `bitsandbytes`: | ||||
|  | ||||
|  | ||||
| @ -54,19 +54,19 @@ DistilBertConfig { | ||||
|   | ||||
| ``` | ||||
|  | ||||
| يمكن تعديل خصائص النموذج المدرب مسبقًا في دالة [`~PreTrainedConfig.from_pretrained`] : | ||||
| يمكن تعديل خصائص النموذج المدرب مسبقًا في دالة [`~PretrainedConfig.from_pretrained`] : | ||||
|  | ||||
| ```py | ||||
| >>> my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4) | ||||
| ``` | ||||
|  | ||||
| بمجرد أن تصبح راضيًا عن تكوين نموذجك، يمكنك حفظه باستخدام [`~PreTrainedConfig.save_pretrained`]. يتم تخزين ملف التكوين الخاص بك على أنه ملف JSON في دليل الحفظ المحدد: | ||||
| بمجرد أن تصبح راضيًا عن تكوين نموذجك، يمكنك حفظه باستخدام [`~PretrainedConfig.save_pretrained`]. يتم تخزين ملف التكوين الخاص بك على أنه ملف JSON في دليل الحفظ المحدد: | ||||
|  | ||||
| ```py | ||||
| >>> my_config.save_pretrained(save_directory="./your_model_save_path") | ||||
| ``` | ||||
|  | ||||
| لإعادة استخدام ملف التكوين، قم بتحميله باستخدام [`~PreTrainedConfig.from_pretrained`]: | ||||
| لإعادة استخدام ملف التكوين، قم بتحميله باستخدام [`~PretrainedConfig.from_pretrained`]: | ||||
|  | ||||
| ```py | ||||
| >>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json") | ||||
| @ -81,6 +81,8 @@ DistilBertConfig { | ||||
|  | ||||
| الخطوة التالية هي إنشاء [نموذج](main_classes/models). النموذج - ويُشار إليه أحيانًا باسم البنية - يُحدد وظيفة كل طبقة والعمليات الحسابية المُنفذة. تُستخدم خصائص مثل `num_hidden_layers` من التكوين لتحديد هذه البنية. تشترك جميع النماذج في  فئة أساسية واحدة هي [`PreTrainedModel`] وبعض الوظائف المُشتركة مثل غيير حجم مُدخلات الكلمات وتقليص رؤوس آلية الانتباه الذاتي. بالإضافة إلى ذلك، فإن جميع النماذج هي  فئات فرعية إما من [`torch.nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)، [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) أو [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) . هذا يعني النماذج متوافقة مع كل استخدام لإطار العمل الخاص بها. | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
| قم بتحميل خصائص التكوين المخصصة الخاصة بك في النموذج: | ||||
|  | ||||
| ```py | ||||
| @ -103,11 +105,39 @@ DistilBertConfig { | ||||
| ```py | ||||
| >>> model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased"، config=my_config) | ||||
| ``` | ||||
| </pt> | ||||
| <tf> | ||||
| قم بتحميل خصائص التكوين المُخصصة الخاصة بك في النموذج: | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import TFDistilBertModel | ||||
|  | ||||
| >>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json") | ||||
| >>> tf_model = TFDistilBertModel(my_config) | ||||
| ``` | ||||
|  | ||||
| هذا ينشئ نموذجًا بقيم عشوائية بدلاً من الأوزان المُدربة مسبقًا. لن يكون هذا النموذج مفيدًا حتى يتم تدريبه. تُعد عملية التدريب مكلفة وتستغرق وقتًا طويلاً. من الأفضل بشكل عام استخدام نموذج مُدرب مسبقًا للحصول على نتائج أفضل بشكل أسرع، مع استخدام جزء بسيط فقط من الموارد المطلوبة للتدريب. | ||||
|  | ||||
| قم بإنشاء نموذج مُدرب مسبقًا باستخدام [`~TFPreTrainedModel.from_pretrained`]: | ||||
|  | ||||
| ```py | ||||
| >>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
|  | ||||
| عندما تقوم بتحميل الأوزان المُدربة مسبقًا،يتم تحميل إعدادات النموذج الافتراضي تلقائيًا إذا كان النموذج من مكتبة 🤗 Transformers. ومع ذلك، يمكنك أيضًا استبدال - بعض أو كل - إعدادات النموذج  الافتراضية بإعداداتك الخاصة: | ||||
|  | ||||
| ```py | ||||
| >>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased"، config=my_config) | ||||
| ``` | ||||
| </tf> | ||||
| </frameworkcontent> | ||||
|  | ||||
| ### رؤوس النموذج | ||||
|  | ||||
| في هذه المرحلة، لديك نموذج DistilBERT الأساسي الذي يخرج *حالات الكامنة*. تُمرَّر هذه الحالات الكامنة كمدخلات لرأس النموذج لإنتاج  المخرجات النهائية. توفر مكتبة 🤗 Transformers رأس نموذج مختلف لكل مهمة طالما أن النموذج يدعم المهمة (أي لا يمكنك استخدام DistilBERT لمهمة تسلسل إلى تسلسل مثل الترجمة). | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
| على سبيل المثال، [`DistilBertForSequenceClassification`] هو نموذج DistilBERT الأساس  مزودًا برأس تصنيف تسلسلي.  يُشكّل رأس التصنيف التسلسلي طبقة خطية فوق المخرجات المجمعة. | ||||
|  | ||||
| ```py | ||||
| @ -123,6 +153,25 @@ DistilBertConfig { | ||||
|  | ||||
| >>> model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
| </pt> | ||||
| <tf> | ||||
| على سبيل المثال، [`TFDistilBertForSequenceClassification`] هو نموذج DistilBERT الأساسي برأس تصنيف تسلسل. رأس التصنيف التسلسلي هو طبقة خطية أعلى المخرجات المجمعة. | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import TFDistilBertForSequenceClassification | ||||
|  | ||||
| >>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
|  | ||||
| أعد استخدام هذا نقطة التحقق لمهمة أخرى عن طريق التبديل إلى رأس نموذج مختلف. لمهمة الإجابة على الأسئلة، ستستخدم رأس النموذج [`TFDistilBertForQuestionAnswering`]. رأس الإجابة على الأسئلة مشابه لرأس التصنيف التسلسلي باستثناء أنه طبقة خطية أعلى حالات الإخراج المخفية. | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import TFDistilBertForQuestionAnswering | ||||
|  | ||||
| >>> tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased") | ||||
| ``` | ||||
| </tf> | ||||
| </frameworkcontent> | ||||
|  | ||||
| ## مجزئ النصوص | ||||
|  | ||||
|  | ||||
| @ -20,11 +20,11 @@ | ||||
| في مثالنا، سنعدّل بعض الوسائط في فئة ResNet التي قد نرغب في ضبطها. ستعطينا التكوينات المختلفة أنواع ResNets المختلفة الممكنة. سنقوم بتخزين هذه الوسائط بعد التحقق من صحته. | ||||
|  | ||||
| ```python | ||||
| from transformers import PreTrainedConfig | ||||
| from transformers import PretrainedConfig | ||||
| from typing import List | ||||
|  | ||||
|  | ||||
| class ResnetConfig(PreTrainedConfig): | ||||
| class ResnetConfig(PretrainedConfig): | ||||
|     model_type = "resnet" | ||||
|  | ||||
|     def __init__( | ||||
| @ -58,11 +58,11 @@ class ResnetConfig(PreTrainedConfig): | ||||
| ``` | ||||
| الأشياء الثلاثة المهمة التي يجب تذكرها عند كتابة تكوينك الخاص هي: | ||||
|  | ||||
| - يجب أن ترث من `PreTrainedConfig`، | ||||
| - يجب أن تقبل دالة  `__init__` الخاصة بـ `PreTrainedConfig` أي معامﻻت إضافية kwargs، | ||||
| - يجب أن ترث من `PretrainedConfig`، | ||||
| - يجب أن تقبل دالة  `__init__` الخاصة بـ `PretrainedConfig` أي معامﻻت إضافية kwargs، | ||||
| - يجب تمرير هذه المعامﻻت الإضافية إلى دالة `__init__` فى الفئة الأساسية الاعلى. | ||||
|  | ||||
| يضمن الإرث حصولك على جميع الوظائف من مكتبة 🤗 Transformers، في حين أن القيدين التانى والثالث يأتيان من حقيقة أن `PreTrainedConfig` لديه المزيد من الحقول أكثر من تلك التي تقوم بتعيينها. عند إعادة تحميل تكوين باستخدام طريقة `from_pretrained`، يجب أن يقبل تكوينك هذه الحقول ثم إرسالها إلى الفئة الأساسية الأعلى. | ||||
| يضمن الإرث حصولك على جميع الوظائف من مكتبة 🤗 Transformers، في حين أن القيدين التانى والثالث يأتيان من حقيقة أن `PretrainedConfig` لديه المزيد من الحقول أكثر من تلك التي تقوم بتعيينها. عند إعادة تحميل تكوين باستخدام طريقة `from_pretrained`، يجب أن يقبل تكوينك هذه الحقول ثم إرسالها إلى الفئة الأساسية الأعلى. | ||||
|  | ||||
| تحديد `model_type` لتكوينك (هنا `model_type="resnet"`) ليس إلزاميًا، ما لم ترغب في | ||||
| تسجيل نموذجك باستخدام الفئات التلقائية (راجع القسم الأخير). | ||||
| @ -82,7 +82,7 @@ resnet50d_config.save_pretrained("custom-resnet") | ||||
| resnet50d_config = ResnetConfig.from_pretrained("custom-resnet") | ||||
| ``` | ||||
|  | ||||
| يمكنك أيضًا استخدام أي طريقة أخرى من فئة [`PreTrainedConfig`]، مثل [`~PreTrainedConfig.push_to_hub`] لتحميل تكوينك مباشرة إلى Hub. | ||||
| يمكنك أيضًا استخدام أي طريقة أخرى من فئة [`PretrainedConfig`]، مثل [`~PretrainedConfig.push_to_hub`] لتحميل تكوينك مباشرة إلى Hub. | ||||
|  | ||||
| ## كتابة نموذج مخصص | ||||
|  | ||||
| @ -280,7 +280,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict()) | ||||
| الآن لإرسال النموذج إلى Hub، تأكد من تسجيل الدخول. إما تشغيل في المحطة الأوامر الطرفية الخاصة بك: | ||||
|  | ||||
| ```bash | ||||
| hf auth login | ||||
| huggingface-cli login | ||||
| ``` | ||||
|  | ||||
| أو من دفتر ملاحظات: | ||||
|  | ||||
| @ -60,10 +60,10 @@ pip install transformers bitsandbytes>=0.39.0 -q | ||||
| أولاً، تحتاج إلى تحميل النموذج. | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import AutoModelForCausalLM, BitsAndBytesConfig | ||||
| >>> from transformers import AutoModelForCausalLM | ||||
|  | ||||
| >>> model = AutoModelForCausalLM.from_pretrained( | ||||
| ...     "mistralai/Mistral-7B-v0.1", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True) | ||||
| ...     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True | ||||
| ... ) | ||||
| ``` | ||||
|  | ||||
| @ -113,12 +113,12 @@ pip install transformers bitsandbytes>=0.39.0 -q | ||||
| هناك العديد من [استراتيجيات التوليد](generation_strategies)، وفي بعض الأحيان قد لا تكون القيم الافتراضية مناسبة لحالتك الاستخدام. إذا لم تكن الإخراج الخاصة بك متوافقة مع ما تتوقعه، فقد قمنا بإنشاء قائمة بأكثر الأخطاء الشائعة وكيفية تجنبها. | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | ||||
| >>> from transformers import AutoModelForCausalLM, AutoTokenizer | ||||
|  | ||||
| >>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") | ||||
| >>> tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default | ||||
| >>> model = AutoModelForCausalLM.from_pretrained( | ||||
| ...     "mistralai/Mistral-7B-v0.1", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True) | ||||
| ...     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True | ||||
| ... ) | ||||
| ``` | ||||
|  | ||||
| @ -192,7 +192,7 @@ LLMs هي [معماريات فك التشفير فقط](https://huggingface.co/l | ||||
| ```python | ||||
| >>> tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha") | ||||
| >>> model = AutoModelForCausalLM.from_pretrained( | ||||
| ...     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True) | ||||
| ...     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True | ||||
| ... ) | ||||
| >>> set_seed(0) | ||||
| >>> prompt = """How many helicopters can a human eat in one sitting? Reply as a thug.""" | ||||
|  | ||||
| @ -13,11 +13,11 @@ | ||||
|  | ||||
| في هذا الدليل، سنستعرض التقنيات الفعالة لتُحسِّن من كفاءة نشر نماذج اللغة الكبيرة: | ||||
|  | ||||
| 1. سنتناول تقنية "دقة أقل" التي أثبتت الأبحاث فعاليتها في تحقيق مزايا حسابية دون التأثير بشكل ملحوظ على أداء النموذج عن طريق العمل بدقة رقمية أقل [8 بت و4 بت](/main_classes/quantization). | ||||
| 1. سنتناول تقنية "دقة أقل" التي أثبتت الأبحاث فعاليتها في تحقيق مزايا حسابية دون التأثير بشكل ملحوظ على أداء النموذج عن طريق العمل بدقة رقمية أقل [8 بت و4 بت](/main_classes/quantization.md). | ||||
|  | ||||
| 2.  **اFlash Attention:** إن Flash Attention وهي نسخة مُعدَّلة من خوارزمية الانتباه التي لا توفر فقط نهجًا أكثر كفاءة في استخدام الذاكرة، ولكنها تحقق أيضًا كفاءة متزايدة بسبب الاستخدام الأمثل لذاكرة GPU. | ||||
|  | ||||
| 3.  **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://huggingface.co/papers/2108.12409)، [الترميز الدوار](https://huggingface.co/papers/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://huggingface.co/papers/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)](https://huggingface.co/papers/2305.13245). | ||||
| 3.  **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://huggingface.co/papers/2108.12409)، [الترميز الدوار](https://huggingface.co/papers/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://huggingface.co/papers/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)]((https://huggingface.co/papers/2305.13245)). | ||||
|  | ||||
| على مدار هذا الدليل، سنقدم تحليلًا للتوليد التنبؤي التلقائي من منظور المُوتِّرات. نتعمق في مزايا وعيوب استخدام دقة أقل، ونقدم استكشافًا شاملاً لخوارزميات الانتباه الأحدث، ونناقش بنيات نماذج نماذج اللغة الكبيرة المحسنة. سندعم الشرح بأمثلة عملية تُبرِز كل تحسين على حدة. | ||||
|  | ||||
| @ -73,7 +73,7 @@ model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="aut | ||||
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | ||||
| import torch | ||||
|  | ||||
| model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", dtype=torch.bfloat16, device_map="auto", pad_token_id=0) | ||||
| model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0) | ||||
| tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder") | ||||
|  | ||||
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | ||||
| @ -114,7 +114,7 @@ bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) | ||||
|  | ||||
| > يتم تدريب جميع النماذج تقريبًا بتنسيق bfloat16 في الوقت الحالي، ولا يوجد سبب لتشغيل النموذج بدقة float32 الكاملة إذا [كانت وحدة معالجة الرسومات (GPU) الخاصة بك تدعم bfloat16](https://discuss.pytorch.org/t/bfloat16-native-support/117155/5). لن توفر دقة float32 نتائج استدلال أفضل من الدقة التي تم استخدامها لتدريب النموذج. | ||||
|  | ||||
| إذا لم تكن متأكدًا من تنسيق تخزين أوزان النموذج على Hub، فيمكنك دائمًا الاطلاع على تهيئة نقطة التفتيش في `"dtype"`، على سبيل المثال [هنا](https://huggingface.co/meta-llama/Llama-2-7b-hf/blob/6fdf2e60f86ff2481f2241aaee459f85b5b0bbb9/config.json#L21). يوصى بتعيين النموذج إلى نفس نوع الدقة كما هو مكتوب في التهيئة عند التحميل باستخدام `from_pretrained(..., dtype=...)` إلا إذا كان النوع الأصلي هو float32، وفي هذه الحالة يمكن استخدام `float16` أو `bfloat16` للاستدلال. | ||||
| إذا لم تكن متأكدًا من تنسيق تخزين أوزان النموذج على Hub، فيمكنك دائمًا الاطلاع على تهيئة نقطة التفتيش في `"torch_dtype"`، على سبيل المثال [هنا](https://huggingface.co/meta-llama/Llama-2-7b-hf/blob/6fdf2e60f86ff2481f2241aaee459f85b5b0bbb9/config.json#L21). يوصى بتعيين النموذج إلى نفس نوع الدقة كما هو مكتوب في التهيئة عند التحميل باستخدام `from_pretrained(..., torch_dtype=...)` إلا إذا كان النوع الأصلي هو float32، وفي هذه الحالة يمكن استخدام `float16` أو `bfloat16` للاستدلال. | ||||
|  | ||||
|  | ||||
| دعونا نحدد وظيفة `flush(...)` لتحرير جميع الذاكرة المخصصة بحيث يمكننا قياس ذروة ذاكرة وحدة معالجة الرسومات (GPU) المخصصة بدقة. | ||||
| @ -231,7 +231,7 @@ flush() | ||||
| دعنا نرى ما هو استهلاك ذاكرة GPU الذروة الذي يوفره تكميم 4 بت. يمكن تكميم النموذج إلى 4 بت باستخدام نفس واجهة برمجة التطبيقات كما في السابق - هذه المرة عن طريق تمرير `load_in_4bit=True` بدلاً من `load_in_8bit=True`. | ||||
|  | ||||
| ```python | ||||
| model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", quantization_config=BitsAndBytesConfig(load_in_4bit=True), pad_token_id=0) | ||||
| model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, pad_token_id=0) | ||||
|  | ||||
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | ||||
|  | ||||
| @ -329,6 +329,174 @@ $$ \textbf{O}_i \leftarrow s^a_{ij} * \textbf{O}_i + s^b_{ij} * \mathbf{V}_{j} \ | ||||
| لنلقِ نظرة على مثال عملي. | ||||
|  | ||||
|  | ||||
| يحصل نموذج OctoCoder الخاص بنا الآن على موجه إدخال أطول بشكل كبير يتضمن ما يسمى *موجه النظام*. تُستخدم موجهات النظام لتوجيه LLM إلى مساعد أفضل مصمم لمهام المستخدمين. | ||||
| فيما يلي، نستخدم موجه النظام الذي سيجعل OctoCoder مساعد ترميز أفضل. | ||||
|  | ||||
| ```python | ||||
| system_prompt = """Below are a series of dialogues between various people and an AI technical assistant. | ||||
| The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable. | ||||
| The assistant is happy to help with code questions and will do their best to understand exactly what is needed. | ||||
| It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. | ||||
| That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful. | ||||
|  | ||||
| The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests). | ||||
| The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data. | ||||
| ----- | ||||
|  | ||||
| Question: Write a function that takes two lists and returns a list that has alternating elements from each input list. | ||||
|  | ||||
| Answer: Sure. Here is a function that does that. | ||||
|  | ||||
| def alternating(list1, list2): | ||||
|    results = [] | ||||
|    for i in range(len(list1)): | ||||
|        results.append(list1[i]) | ||||
|        results.append(list2[i]) | ||||
|    return results | ||||
|  | ||||
| Question: Can you write some test cases for this function? | ||||
|  | ||||
| Answer: Sure, here are some tests. | ||||
|  | ||||
| assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3] | ||||
| assert alternating([True, False], [4, 5]) == [True, 4, False, 5] | ||||
| assert alternating([], []) == [] | ||||
|  | ||||
| Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end. | ||||
|  | ||||
| Answer: Here is the modified function. | ||||
|  | ||||
| def alternating(list1, list2): | ||||
|    results = [] | ||||
|    for i in range(min(len(list1), len(list2))): | ||||
|        results.append(list1[i]) | ||||
|        results.append(list2[i]) | ||||
|    if len(list1) > len(list2): | ||||
|        results.extend(list1[i+1:]) | ||||
|    else: | ||||
|        results.extend(list2[i+1:]) | ||||
|    return results | ||||
| ----- | ||||
| """ | ||||
| ``` | ||||
| لأغراض التوضيح، سنكرر موجه النظام عشر مرات بحيث يكون طول الإدخال طويلاً بما يكفي لملاحظة وفورات ذاكرة Flash Attention. | ||||
| نضيف موجه النص الأصلي "سؤال: يرجى كتابة وظيفة في Python تقوم بتحويل البايتات إلى جيجا بايت. | ||||
|  | ||||
| ```python | ||||
| long_prompt = 10 * system_prompt + prompt | ||||
| ``` | ||||
|  | ||||
| نقوم بتنفيذ نموذجنا مرة أخرى بدقة bfloat16. | ||||
|  | ||||
| ```python | ||||
| model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto") | ||||
| tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder") | ||||
|  | ||||
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | ||||
| ``` | ||||
|  | ||||
| دعنا الآن نقوم بتشغيل النموذج تمامًا مثلما كان من قبل *بدون اهتمام فلاشي* وقياس متطلبات ذاكرة GPU وقت الذروة ووقت الاستدلال. | ||||
|  | ||||
| ```python | ||||
| import time | ||||
|  | ||||
| start_time = time.time() | ||||
| result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):] | ||||
|  | ||||
| print(f"Generated in {time.time() - start_time} seconds.") | ||||
| result | ||||
| ``` | ||||
|  | ||||
| **الإخراج**: | ||||
| ``` | ||||
| تم التوليد في 10.96854019165039 ثانية. | ||||
| بالتأكيد. إليك وظيفة للقيام بذلك. | ||||
|  | ||||
| def bytes_to_giga(bytes): | ||||
| return bytes / 1024 / 1024 / 1024 | ||||
|  | ||||
| الإجابة: بالتأكيد. إليك وظيفة للقيام بذلك. | ||||
|  | ||||
| ديف | ||||
| ``` | ||||
|  | ||||
| نحصل على نفس الإخراج كما كان من قبل، ولكن هذه المرة، يقوم النموذج بتكرار الإجابة عدة مرات حتى يتم قطعها عند 60 رمزًا. ليس من المستغرب أننا كررنا موجه النظام عشر مرات لأغراض التوضيح وبالتالي قمنا بتشغيل النموذج لتكرار نفسه. | ||||
|  | ||||
| **ملاحظة** لا ينبغي تكرار موجه النظام عشر مرات في التطبيقات الواقعية - مرة واحدة كافية! | ||||
|  | ||||
| دعنا نقيس متطلبات ذاكرة GPU وقت الذروة. | ||||
|  | ||||
| ```python | ||||
| bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) | ||||
| ``` | ||||
|  | ||||
| **الإخراج**: | ||||
| ``` | ||||
| 37.668193340301514 | ||||
| ``` | ||||
|  | ||||
| كما نرى، فإن متطلبات ذاكرة GPU وقت الذروة أعلى بكثير مما كانت عليه في البداية، وهو ما يرجع إلى حد كبير إلى تسلسل الإدخال الأطول. أيضًا، يستغرق التوليد أكثر من دقيقة بقليل الآن. | ||||
|  | ||||
| نستدعي `flush()` لتحرير ذاكرة GPU لتجربتنا التالية. | ||||
|  | ||||
| ```python | ||||
| flush() | ||||
| ``` | ||||
|  | ||||
| لمقارنة، دعونا نقوم بتشغيل نفس الدالة، ولكن تمكين الاهتمام فلاش بدلا من ذلك. | ||||
| للقيام بذلك، نقوم بتحويل النموذج إلى [BetterTransformer](Https://huggingface.co/docs/optimum/bettertransformer/overview) ومن خلال القيام بذلك تمكين PyTorch's [SDPA self-attention](Https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention) والتي بدورها قادرة على استخدام الاهتمام فلاش. | ||||
|  | ||||
| ```python | ||||
| model.to_bettertransformer() | ||||
| ``` | ||||
|  | ||||
| الآن نقوم بتشغيل نفس مقتطف التعليمات البرمجية بالضبط كما كان من قبل وتحت الغطاء سوف تستخدم المحولات الاهتمام فلاش. | ||||
|  | ||||
| ```py | ||||
| start_time = time.time() | ||||
| with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False): | ||||
|     result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):] | ||||
|  | ||||
| print(f"Generated in {time.time() - start_time} seconds.") | ||||
| result | ||||
| ``` | ||||
|  | ||||
| **الإخراج**: | ||||
| ``` | ||||
| تم التوليد في 3.0211617946624756 ثانية. | ||||
| بالتأكيد. إليك وظيفة للقيام بذلك. | ||||
|  | ||||
| def bytes_to_giga(bytes): | ||||
| return bytes / 1024 / 1024 / 1024 | ||||
|  | ||||
| الإجابة: بالتأكيد. إليك وظيفة للقيام بذلك. | ||||
|  | ||||
| ديف | ||||
| ``` | ||||
|  | ||||
| نحصل على نفس النتيجة بالضبط كما كان من قبل، ولكن يمكننا ملاحظة تسريع كبير بفضل الاهتمام فلاش. | ||||
|  | ||||
| دعنا نقيس استهلاك الذاكرة لآخر مرة. | ||||
|  | ||||
| ```python | ||||
| bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) | ||||
| ``` | ||||
|  | ||||
| **الإخراج**: | ||||
| ``` | ||||
| 32.617331981658936 | ||||
| ``` | ||||
|  | ||||
| ونحن تقريبا مرة أخرى إلى ذاكرة GPU الذروة الأصلية لدينا 29GB. | ||||
|  | ||||
| يمكننا أن نلاحظ أننا نستخدم فقط حوالي 100 ميجابايت إضافية من ذاكرة GPU عند تمرير تسلسل إدخال طويل جدًا مع الاهتمام فلاش مقارنة بتمرير تسلسل إدخال قصير كما فعلنا في البداية. | ||||
|  | ||||
| ```py | ||||
| flush() | ||||
| ``` | ||||
|  | ||||
| لمزيد من المعلومات حول كيفية استخدام Flash Attention، يرجى الاطلاع على [صفحة doc هذه](Https://huggingface.co/docs/transformers/en/perf_infer_gpu_one#flashattention-2). | ||||
|  | ||||
| ## 3. الابتكارات المعمارية | ||||
|  | ||||
| حتى الآن، نظرنا في تحسين الكفاءة الحسابية والذاكرة من خلال: | ||||
| @ -472,7 +640,7 @@ for _ in range(5): | ||||
|   next_token_id = torch.argmax(next_logits, dim=-1) | ||||
|  | ||||
|   print("shape of input_ids", next_token_id.shape) | ||||
|   print("length of key-value cache", past_key_values.get_seq_length())  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim] | ||||
|   print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim] | ||||
|   generated_tokens.append(next_token_id.item()) | ||||
|  | ||||
| generated_text = tokenizer.batch_decode(generated_tokens) | ||||
|  | ||||
| @ -41,7 +41,7 @@ picture-in-picture" allowfullscreen></iframe> | ||||
| قبل مشاركة نموذج على Hub، ستحتاج إلى بيانات اعتماد حساب Hugging Face الخاصة بك.  إذا كنت تستخدم منصة الأوامر، فقم بتشغيل الأمر التالي في بيئة افتراضية حيث تم تثبيت 🤗 Transformers. سيقوم هذا الأمر بتخزين رمز الدخول الخاص بك في مجلد تخزين المؤقت لـ Hugging Face (`~/.cache/` بشكل افتراضي): | ||||
|  | ||||
| ```bash | ||||
| hf auth login | ||||
| huggingface-cli login | ||||
| ``` | ||||
|  | ||||
| إذا كنت تستخدم دفتر ملاحظات مثل Jupyter أو Colaboratory، فتأكد من تثبيت مكتبة [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library). تسمح لك هذه المكتبة بالتفاعل برمجيًا مع Hub. | ||||
| @ -65,15 +65,43 @@ pip install huggingface_hub | ||||
|  | ||||
| تحويل نقطة التحقق لإطار عمل آخر أمر سهل. تأكد من تثبيت PyTorch و TensorFlow (راجع [هنا](installation) لتعليمات التثبيت)، ثم ابحث عن النموذج الملائم لمهمتك في الإطار الآخر. | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
| حدد `from_tf=True` لتحويل نقطة تحقق من TensorFlow إلى PyTorch: | ||||
|  | ||||
| ```py | ||||
| >>> pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True) | ||||
| >>> pt_model.save_pretrained("path/to/awesome-name-you-picked") | ||||
| ``` | ||||
| </pt> | ||||
| <tf> | ||||
| حدد `from_pt=True` لتحويل نقطة تحقق من PyTorch إلى TensorFlow: | ||||
|  | ||||
| ```py | ||||
| >>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True) | ||||
| ``` | ||||
|  | ||||
| بعد ذلك، يمكنك حفظ نموذج TensorFlow الجديد بنقطة التحقق الجديدة: | ||||
|  | ||||
| ```py | ||||
| >>> tf_model.save_pretrained("path/to/awesome-name-you-picked") | ||||
| ``` | ||||
| </tf> | ||||
| <jax> | ||||
| إذا كان النموذج متاحًا في Flax، فيمكنك أيضًا تحويل نقطة تحقق من PyTorch إلى Flax: | ||||
|  | ||||
| ```py | ||||
| >>> flax_model = FlaxDistilBertForSequenceClassification.from_pretrained( | ||||
| ...     "path/to/awesome-name-you-picked", from_pt=True | ||||
| ... ) | ||||
| ``` | ||||
| </jax> | ||||
| </frameworkcontent> | ||||
|  | ||||
| ## دفع نموذج أثناء التدريب | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
| <Youtube id="Z1-XMy-GNLQ"/> | ||||
|  | ||||
| مشاركة نموذجك على Hub مر بسيط للغاية كل ما عليك هو إضافة معلمة أو استدعاء رد إضافي. كما تذكر من درس [التدريب الدقيق](training)، فإن فئة [`TrainingArguments`] هي المكان الذي تحدد فيه المعلمات الفائقة وخيارات التدريب الإضافية. تشمل إحدى خيارات التدريب هذه القدرة على دفع النموذج مباشرة إلى المنصة Hub. قم بتعيين `push_to_hub=True` في [`TrainingArguments`]: | ||||
| @ -99,6 +127,29 @@ pip install huggingface_hub | ||||
| ```py | ||||
| >>> trainer.push_to_hub() | ||||
| ``` | ||||
| </pt> | ||||
| <tf> | ||||
| شارك نموذجًا على Hub باستخدام [`PushToHubCallback`]. في دالة [`PushToHubCallback`], أضف: | ||||
|  | ||||
| - دليل إخراج لنموذجك. | ||||
| - مُجزّئ اللغوي. | ||||
| - `hub_model_id`، والذي هو اسم مستخدم Hub واسم النموذج الخاص بك. | ||||
|  | ||||
| ```py | ||||
| >>> from transformers import PushToHubCallback | ||||
|  | ||||
| >>> push_to_hub_callback = PushToHubCallback( | ||||
| ...     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model" | ||||
| ... ) | ||||
| ``` | ||||
|  | ||||
| أضف الاستدعاء إلى [`fit`](https://keras.io/api/models/model_training_apis/)، وسيقوم 🤗 Transformers بدفع النموذج المدرب إلى Hub: | ||||
|  | ||||
| ```py | ||||
| >>> model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback) | ||||
| ``` | ||||
| </tf> | ||||
| </frameworkcontent> | ||||
|  | ||||
| ## استخدام دالة `push_to_hub` | ||||
|  | ||||
| @ -169,4 +220,4 @@ pip install huggingface_hub | ||||
| * قم بإنشاء ملف `README.md` وتحميله يدويًا. | ||||
| * انقر فوق الزر **Edit model card** في مستودع نموذجك. | ||||
|  | ||||
| الق نظرة على بطاقة [DistilBert](https://huggingface.co/distilbert/distilbert-base-uncased) للحصول على مثال جيد على نوع المعلومات التي يجب أن تتضمنها بطاقة النموذج. للحصول على مزيد من التفاصيل حول الخيارات الأخرى التي يمكنك التحكم فيها في ملف `README.md` مثل البصمة الكربونية للنموذج أو أمثلة الأداة، راجع الوثائق [هنا](https://huggingface.co/docs/hub/models-cards). | ||||
| الق نظرة على بطاقة [DistilBert](https://huggingface.co/distilbert/distilbert-base-uncased) للحصول على مثال جيد على نوع المعلومات التي يجب أن تتضمنها بطاقة النموذج. للحصول على مزيد من التفاصيل حول الخيارات الأخرى التي يمكنك التحكم فيها في ملف `README.md` مثل البصمة الكربونية للنموذج أو أمثلة الأداة، راجع الوثائق [هنا](https://huggingface.co/docs/hub/models-cards). | ||||
| @ -39,6 +39,7 @@ | ||||
| | [كيفية ضبط نموذج بدقة على التلخيص](https://github.com/huggingface/notebooks/blob/main/examples/summarization.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج مُدرَّب مسبقًا بدقة على XSUM. | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| | ||||
| | [كيفية تدريب نموذج لغة من البداية](https://github.com/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| تسليط الضوء على جميع الخطوات لتدريب نموذج Transformer بشكل فعال على بيانات مخصصة | [](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| | ||||
| | [كيفية إنشاء نص](https://github.com/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| كيفية استخدام أساليب فك التشفير المختلفة لإنشاء اللغة باستخدام المحولات | [](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| | ||||
| | [كيفية إنشاء نص (مع قيود)](https://github.com/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)| كيفية توجيه إنشاء اللغة باستخدام القيود التي يوفرها المستخدم | [](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)| [](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)| | ||||
| | [Reformer](https://github.com/huggingface/blog/blob/main/notebooks/03_reformer.ipynb)| كيف يدفع Reformer حدود النمذجة اللغوية | [](https://colab.research.google.com/github/patrickvonplaten/blog/blob/main/notebooks/03_reformer.ipynb)| [](https://studiolab.sagemaker.aws/import/github/patrickvonplaten/blog/blob/main/notebooks/03_reformer.ipynb)| | ||||
|  | ||||
| #### رؤية الكمبيوتر[[pytorch-cv]] | ||||
|  | ||||
| @ -90,7 +90,7 @@ out = transcriber(...)  # سيتم الرجوع إلى استخدام `my_parame | ||||
| transcriber = pipeline(model="openai/whisper-large-v2", device=0) | ||||
| ``` | ||||
|  | ||||
| إذا كان النموذج كبيرًا جدًا بالنسبة لوحدة معالجة الرسومات (GPU) واحدة، وأنت تستخدم PyTorch، فيمكنك تعيين `dtype='float16'` لتمكين الاستدلال بدقة FP16. عادةً ما لا يتسبب ذلك في حدوث انخفاضات كبيرة في الأداء، ولكن تأكد من تقييمه على نماذجك! | ||||
| إذا كان النموذج كبيرًا جدًا بالنسبة لوحدة معالجة الرسومات (GPU) واحدة، وأنت تستخدم PyTorch، فيمكنك تعيين `torch_dtype='float16'` لتمكين الاستدلال بدقة FP16. عادةً ما لا يتسبب ذلك في حدوث انخفاضات كبيرة في الأداء، ولكن تأكد من تقييمه على نماذجك! | ||||
|  | ||||
| بدلاً من ذلك، يمكنك تعيين `device_map="auto"` لتحديد كيفية تحميل مخزنات النموذج وتخزينها تلقائيًا. يتطلب استخدام معامل `device_map` مكتبه 🤗 [Accelerate](https://huggingface.co/docs/accelerate): | ||||
|  | ||||
| @ -273,7 +273,7 @@ pip install pytesseract | ||||
| import torch | ||||
| from transformers import pipeline | ||||
|  | ||||
| pipe = pipeline(model="facebook/opt-1.3b", dtype=torch.bfloat16, device_map="auto") | ||||
| pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto") | ||||
| output = pipe("This is a cool example!", do_sample=True, top_p=0.95) | ||||
| ``` | ||||
|  | ||||
|  | ||||
| @ -152,6 +152,8 @@ pip install datasets | ||||
|  | ||||
| قم بتعيين معلمة `return_tensors` إلى إما `pt` لـ PyTorch، أو `tf` لـ TensorFlow: | ||||
|  | ||||
| <frameworkcontent> | ||||
| <pt> | ||||
|  | ||||
| ```py | ||||
| >>> batch_sentences = [ | ||||
| @ -171,6 +173,33 @@ pip install datasets | ||||
|                            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], | ||||
|                            [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])} | ||||
| ``` | ||||
| </pt> | ||||
| <tf> | ||||
|   | ||||
| ```py | ||||
| >>> batch_sentences = [ | ||||
| ...     "But what about second breakfast?", | ||||
| ...     "Don't think he knows about second breakfast, Pip.", | ||||
| ...     "What about elevensies?", | ||||
| ... ] | ||||
| >>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf") | ||||
| >>> print(encoded_input) | ||||
| {'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy= | ||||
| array([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0], | ||||
|        [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102], | ||||
|        [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]], | ||||
|       dtype=int32)>, | ||||
|  'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy= | ||||
| array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], | ||||
|        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], | ||||
|        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, | ||||
|  'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy= | ||||
| array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], | ||||
|        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], | ||||
|        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>} | ||||
| ``` | ||||
| </tf> | ||||
| </frameworkcontent> | ||||
|  | ||||
| <Tip> | ||||
|  | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	