mirror of
				https://github.com/huggingface/kernels.git
				synced 2025-10-31 19:54:28 +08:00 
			
		
		
		
	Compare commits
	
		
			19 Commits
		
	
	
		
			fix_metada
			...
			upload-hub
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 81fb5d34bb | |||
| fd237b04bd | |||
| 0eb07f198c | |||
| 620bf75864 | |||
| 8f78116b87 | |||
| f1782d1914 | |||
| f6c901205c | |||
| 6899e4bfe1 | |||
| ad9cba28f7 | |||
| 2f1986e01a | |||
| ab607022c0 | |||
| 02cbff1d0f | |||
| d2d8f77d97 | |||
| 421f09e08a | |||
| e2d43815c1 | |||
| 7ee9660d2c | |||
| b56106966e | |||
| 1720baac7d | |||
| a6dc55ddb1 | 
							
								
								
									
										15
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							| @ -24,7 +24,7 @@ jobs: | ||||
|       max-parallel: 4 | ||||
|       matrix: | ||||
|         python-version: ["3.10", "3.12"] | ||||
|         torch-version: ["2.7.0", "2.8.0"] | ||||
|         torch-version: ["2.6.0", "2.7.0"] | ||||
|  | ||||
|     env: | ||||
|       UV_PYTHON_PREFERENCE: only-managed | ||||
| @ -52,15 +52,9 @@ jobs: | ||||
|  | ||||
|       - name: Run tests | ||||
|         run: | | ||||
|           export HF_TOKEN=${{ secrets.HF_TOKEN }} | ||||
|           uv run pytest tests | ||||
|  | ||||
|       - name: Run staging tests | ||||
|         env: | ||||
|           HF_TOKEN: ${{ secrets.HF_STAGING_TOKEN }} | ||||
|         run: | | ||||
|           HUGGINGFACE_CO_STAGING=true uv run pytest --token -m "is_staging_test" tests/ | ||||
|         if: matrix.python_version == '3.10' && matrix.torch-version == '2.7.0' | ||||
|  | ||||
|       - name: Check kernel conversion | ||||
|         run: | | ||||
|           uv pip install wheel | ||||
| @ -73,11 +67,6 @@ jobs: | ||||
|         run: | | ||||
|           uv run kernels generate-readme kernels-community/triton-layer-norm | ||||
|  | ||||
|       - name: Check kernel check | ||||
|         run: | | ||||
|           uv pip install kernel-abi-check | ||||
|           kernels check kernels-community/activation | ||||
|  | ||||
|       - name: Import check without torch | ||||
|         run: | | ||||
|           uv pip uninstall torch | ||||
|  | ||||
							
								
								
									
										8
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								Makefile
									
									
									
									
									
								
							| @ -1,8 +0,0 @@ | ||||
| .PHONY: style | ||||
|  | ||||
| export check_dirs := src examples tests | ||||
|  | ||||
| style: | ||||
| 	black ${check_dirs} | ||||
| 	isort ${check_dirs} | ||||
| 	ruff check ${check_dirs} --fix | ||||
| @ -51,7 +51,7 @@ activation.gelu_fast(y, x) | ||||
| print(y) | ||||
| ``` | ||||
|  | ||||
| You can [search for kernels](https://huggingface.co/models?other=kernels) on | ||||
| You can [search for kernels](https://huggingface.co/models?other=kernel) on | ||||
| the Hub. | ||||
|  | ||||
| ## 📚 Documentation | ||||
| @ -62,6 +62,7 @@ the Hub. | ||||
| - [Using layers](docs/source/layers.md) | ||||
| - [Locking kernel/layer versions](docs/source/locking.md) | ||||
| - [Environment variables](docs/source/env.md) | ||||
| - [Using kernels in a Docker container](docs/source/docker.md) | ||||
| - [Kernel requirements](docs/source/kernel-requirements.md) | ||||
| - [Frequently Asked Questions](docs/source/faq.md) | ||||
| - [Writing kernels](https://github.com/huggingface/kernel-builder/blob/main/docs/writing-kernels.md) using [kernel-builder](https://github.com/huggingface/kernel-builder/) | ||||
|  | ||||
| @ -21,8 +21,6 @@ | ||||
|       title: Kernels | ||||
|     - local: api/layers | ||||
|       title: Layers | ||||
|     - local: cli | ||||
|       title: Kernels CLI | ||||
|   title: API Reference | ||||
| - sections: | ||||
|     - local: kernel-requirements | ||||
|  | ||||
| @ -6,10 +6,6 @@ | ||||
|  | ||||
| [[autodoc]] kernels.get_kernel | ||||
|  | ||||
| ### get_local_kernel | ||||
|  | ||||
| [[autodoc]] kernels.get_local_kernel | ||||
|  | ||||
| ### has_kernel | ||||
|  | ||||
| [[autodoc]] kernels.has_kernel | ||||
|  | ||||
| @ -39,11 +39,3 @@ | ||||
| ### LayerRepository | ||||
|  | ||||
| [[autodoc]] kernels.LayerRepository | ||||
|  | ||||
| ### LocalLayerRepository | ||||
|  | ||||
| [[autodoc]] kernels.LocalLayerRepository | ||||
|  | ||||
| ### LockedLayerRepository | ||||
|  | ||||
| [[autodoc]] kernels.LockedLayerRepository | ||||
|  | ||||
| @ -21,22 +21,6 @@ activation.gelu_fast(y, x) | ||||
| print(y) | ||||
| ``` | ||||
|  | ||||
| ### Using version bounds | ||||
|  | ||||
| Kernels are versioned using tags of the form `v<major>.<minor>.<patch>`. | ||||
| You can specify which version to download using Python version specifiers: | ||||
|  | ||||
| ```python | ||||
| import torch | ||||
| from kernels import get_kernel | ||||
|  | ||||
| activation = get_kernel("kernels-community/activation", version=">=0.0.4,<0.1.0") | ||||
| ``` | ||||
|  | ||||
| This will get the latest kernel tagged `v0.0.z` where `z` is at least 4. It | ||||
| is strongly recommended to specify a version bound, since a kernel author | ||||
| might push incompatible changes to the `main` branch. | ||||
|  | ||||
| ## Checking Kernel Availability | ||||
|  | ||||
| You can check if a specific kernel is available for your environment: | ||||
|  | ||||
| @ -1,58 +0,0 @@ | ||||
| # Kernels CLI Reference | ||||
|  | ||||
| ## Main Functions | ||||
|  | ||||
| ### kernels check | ||||
|  | ||||
| You can use `kernels check` to test compliance of a kernel on the Hub. | ||||
| This currently checks that the kernel: | ||||
|  | ||||
| - Supports the currently-required Python ABI version. | ||||
| - Works on supported operating system versions. | ||||
|  | ||||
| For example: | ||||
|  | ||||
| ```bash | ||||
| $ kernels check kernels-community/flash-attn3 | ||||
| Checking variant: torch28-cxx11-cu128-aarch64-linux | ||||
|   🐍 Python ABI 3.9 compatible | ||||
|   🐧 manylinux_2_28 compatible | ||||
| [...] | ||||
| ``` | ||||
|  | ||||
| ### kernels to-wheel | ||||
|  | ||||
| We strongly recommend downloading kernels from the Hub using the `kernels` | ||||
| package, since this comes with large [benefits](index.md) over using Python | ||||
| wheels. That said, some projects may require deployment of kernels as | ||||
| wheels. The `kernels` utility provides a simple solution to this. You can | ||||
| convert any Hub kernel into a set of wheels with the `to-wheel` command: | ||||
|  | ||||
| ```bash | ||||
| $ kernels to-wheel drbh/img2grey 1.1.2 | ||||
| ☸ img2grey-1.1.2+torch27cu128cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu124cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu126cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu128cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx98-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu126cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu118cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu124cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu118cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu118cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ``` | ||||
|  | ||||
| ### kernels upload | ||||
|  | ||||
| Use `kernels upload <dir_containing_build> --repo_id="hub-username/kernel"` to upload | ||||
| your kernel builds to the Hub. To know the supported arguments run: `kernels upload -h`. | ||||
|  | ||||
| **Notes**: | ||||
|  | ||||
| - This will take care of creating a repository on the Hub with the `repo_id` provided. | ||||
| - If a repo with the `repo_id` already exists and if it contains a `build` with the build variant | ||||
|   being uploaded, it will attempt to delete the files existing under it. | ||||
| - Make sure to be authenticated (run `hf auth login` if not) to be able to perform uploads to the Hub. | ||||
| @ -1,8 +1,6 @@ | ||||
| # FAQ | ||||
|  | ||||
| ## Kernel layers | ||||
|  | ||||
| ### Why is the kernelization step needed as a separate step? | ||||
| ## Why is the kernelization step needed? | ||||
|  | ||||
| In earlier versions of `kernels`, a layer's `forward` method was replaced | ||||
| by `use_kernel_forward_from_hub` and `replace_kernel_forward_from_hub`. | ||||
| @ -13,39 +11,3 @@ on data-dependent branching. | ||||
|  | ||||
| To avoid branching, we have to make dispatch decisions ahead of time, | ||||
| which is what the `kernelize` function does. | ||||
|  | ||||
| ### Why does kernelization only replace `forward` methods? | ||||
|  | ||||
| There are some other possible approaches. The first is to completely | ||||
| replace existing layers by kernel layers. However, since this would | ||||
| permit free-form layer classes, it would be much harder to validate | ||||
| that layers are fully compatible with the layers that they are | ||||
| replacing. For instance, they could have completely different member | ||||
| variables. Besides that, we would also need to hold on to the original | ||||
| layers, in case we need to revert to the base layers when the model | ||||
| is `kernelize`d again with different options. | ||||
|  | ||||
| A second approach would be to make an auxiliary layer that wraps the | ||||
| original layer and the kernel layer and dispatches to the kernel layer. | ||||
| This wouldn't have the issues of the first approach, because kernel layers | ||||
| could be similarly strict as they are now, and we would still have access | ||||
| to the original layers when `kernelize`-ing the model again. However, | ||||
| this would change the graph structure of the model and would break use | ||||
| cases where programs access the model internals (e.g. | ||||
| `model.layers[0].attention.query_weight`) or rely on the graph structure | ||||
| in other ways. | ||||
|  | ||||
| The approach of `forward`-replacement is the least invasive, because | ||||
| it preserves the original model graph. It is also reversible, since | ||||
| even though the `forward` of a layer _instance_ might be replaced, | ||||
| the corresponding class still has the original `forward`. | ||||
|  | ||||
| ## Misc | ||||
|  | ||||
| ### How can I disable kernel reporting in the user-agent? | ||||
|  | ||||
| By default, we collect telemetry when a call to `get_kernel()` is made. | ||||
| This only includes the `kernels` version, `torch` version, and the build | ||||
| information for the kernel being requested. | ||||
|  | ||||
| You can disable this by setting `export DISABLE_TELEMETRY=yes`. | ||||
|  | ||||
| @ -16,5 +16,5 @@ packages in that they are made to be: | ||||
|   the different PyTorch build configurations (various CUDA versions | ||||
|   and C++ ABIs). Furthermore, older C library versions must be supported. | ||||
|  | ||||
| You can [search for kernels](https://huggingface.co/models?other=kernels) on | ||||
| You can [search for kernels](https://huggingface.co/models?other=kernel) on | ||||
| the Hub. | ||||
|  | ||||
| @ -34,8 +34,6 @@ Kernels are versioned on the Hub using Git tags. Version tags must be of | ||||
| the form `v<major>.<minor>.<patch>`. Versions are used by [locking](./locking.md) | ||||
| to resolve the version constraints. | ||||
|  | ||||
| We recommend using [semver](https://semver.org/) to version kernels. | ||||
|  | ||||
| ## Native Python module | ||||
|  | ||||
| Kernels will typically contain a native Python module with precompiled | ||||
| @ -46,28 +44,19 @@ have dynamic library dependencies outside: | ||||
| - Torch; | ||||
| - CUDA/ROCm libraries installed as dependencies of Torch. | ||||
|  | ||||
| ## Compatibility with torch.compile | ||||
|  | ||||
| The Kernel Hub also encourages to write the kernels in a `torch.compile` | ||||
| compliant way. This helps to ensure that the kernels are compatible with | ||||
| `torch.compile` without introducing any graph breaks and triggering  | ||||
| recompilation which can limit the benefits of compilation. | ||||
|  | ||||
| [Here](https://github.com/huggingface/kernel-builder/blob/d1ee9bf9301ac8c5199099d90ee1c9d5c789d5ba/examples/relu-backprop-compile/tests/test_relu.py#L162) is a simple test example which checks for graph breaks and  | ||||
| recompilation triggers during `torch.compile`. | ||||
|  | ||||
| ### Linux | ||||
|  | ||||
| - Use [ABI3/Limited API](https://docs.python.org/3/c-api/stable.html#stable-application-binary-interface) | ||||
|   for compatibility with Python 3.9 and later. | ||||
| - Compatible with [`manylinux_2_28`](https://github.com/pypa/manylinux?tab=readme-ov-file#manylinux_2_28-almalinux-8-based). | ||||
|   This means that the extension **must not** use symbols versions higher than: | ||||
|  | ||||
|   - GLIBC 2.28 | ||||
|   - GLIBCXX 3.4.24 | ||||
|   - CXXABI 1.3.11 | ||||
|   - GCC 7.0.0 | ||||
|  | ||||
| These requirements can be checked with the ABI checker (see below). | ||||
| These requirement can be checked with the ABI checker (see below). | ||||
|  | ||||
| ### macOS | ||||
|  | ||||
|  | ||||
| @ -5,7 +5,7 @@ the Hub can replace the `forward` method of an existing layer for a certain | ||||
| device type. This makes it possible to provide more performant kernels for | ||||
| existing layers. | ||||
|  | ||||
| See [Kernel requirements](kernel-requirements.md) for more information on the | ||||
| See [Kernel requirements](kernel-requirements.md) for more information the | ||||
| requirements of Hub layers. | ||||
|  | ||||
| ## Making a layer extensible with kernels from the hub | ||||
| @ -111,7 +111,7 @@ model = kernelize(model, mode=Mode.INFERENCE | Mode.TORCH_COMPILE, use_fallback= | ||||
|  | ||||
| This can be useful if you want to guarantee that Hub kernels are used. | ||||
|  | ||||
| ### Inspecting which kernels are used | ||||
| ### Inspecting kernels which kernels are used | ||||
|  | ||||
| The kernels that are used are logged at the `INFO` level by `kernelize`. | ||||
| See the [Python logging](https://docs.python.org/3/library/logging.html) | ||||
| @ -157,33 +157,6 @@ with use_kernel_mapping(kernel_layer_mapping): | ||||
| This ensures that the mapping is not active anymore outside the | ||||
| `with`-scope. | ||||
|  | ||||
| ### Using version bounds | ||||
|  | ||||
| Kernels are versioned using tags of the form `v<major>.<minor>.<patch>`. | ||||
| You can specify which version of the kernel to download using Python version | ||||
| specifiers: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|             version=">=0.0.4,<0.1.0", | ||||
|         ), | ||||
|         "rocm": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|             version=">=0.0.4,<0.1.0", | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| This will get the layer from latest kernel tagged `v0.0.z` where `z` is at | ||||
| least 4. It is strongly recommended to specify a version bound, since a | ||||
| kernel author might push incompatible changes to the `main` branch. | ||||
|  | ||||
| ### Registering kernels for specific modes | ||||
|  | ||||
| You might want to register two different kernels for a particular layer, | ||||
|  | ||||
| @ -20,11 +20,11 @@ activation.gelu_fast(y, x) | ||||
| print("Kernel successfully executed") | ||||
|  | ||||
| # Check results | ||||
| expected = torch.tensor( | ||||
|     [[0.8408, 1.9551, 2.9961], [4.0000, 5.0000, 6.0000], [7.0000, 8.0000, 9.0000]], | ||||
|     device="cuda:0", | ||||
|     dtype=torch.float16, | ||||
| ) | ||||
| expected = torch.tensor([ | ||||
|     [0.8408, 1.9551, 2.9961], | ||||
|     [4.0000, 5.0000, 6.0000], | ||||
|     [7.0000, 8.0000, 9.0000] | ||||
| ], device='cuda:0', dtype=torch.float16) | ||||
| assert torch.allclose(y, expected) | ||||
|  | ||||
| print("Calculated values are exact") | ||||
|  | ||||
| @ -24,7 +24,6 @@ | ||||
|       in | ||||
|       { | ||||
|         formatter = pkgs.nixfmt-tree; | ||||
|         packages.kernel-abi-check = pkgs.python3.pkgs.callPackage ./nix/kernel-abi-check.nix {}; | ||||
|         devShells = with pkgs; rec { | ||||
|           default = mkShell { | ||||
|             nativeBuildInputs = [ | ||||
| @ -41,7 +40,6 @@ | ||||
|               ++ (with python3.pkgs; [ | ||||
|                 docutils | ||||
|                 huggingface-hub | ||||
|                 (callPackage ./nix/kernel-abi-check.nix {}) | ||||
|                 mktestdocs | ||||
|                 pytest | ||||
|                 pytest-benchmark | ||||
|  | ||||
| @ -1,27 +0,0 @@ | ||||
| { | ||||
|   buildPythonPackage, | ||||
|   fetchPypi, | ||||
|   rustPlatform, | ||||
| }: | ||||
|  | ||||
| buildPythonPackage rec { | ||||
|   pname = "kernel-abi-check"; | ||||
|   version = "0.6.2"; | ||||
|  | ||||
|   src = fetchPypi { | ||||
|     inherit version; | ||||
|     pname = "kernel_abi_check"; | ||||
|     hash = "sha256-goWC7SK79FVNEvkp3bISBwbOqdSrmobANtrWIve9/Ys="; | ||||
|   }; | ||||
|  | ||||
|   cargoDeps = rustPlatform.fetchCargoVendor { | ||||
|     inherit pname version src sourceRoot; | ||||
|     hash = "sha256-+1jdbKsDKmG+bf0NEVYMv8t7Meuge1z2cgYfbdB9q8A="; | ||||
|   }; | ||||
|  | ||||
|   sourceRoot = "kernel_abi_check-${version}/bindings/python"; | ||||
|  | ||||
|   pyproject = true; | ||||
|  | ||||
|   nativeBuildInputs = with rustPlatform; [ cargoSetupHook maturinBuildHook ]; | ||||
| } | ||||
| @ -1,6 +1,6 @@ | ||||
| [project] | ||||
| name = "kernels" | ||||
| version = "0.10.4.dev0" | ||||
| version = "0.10.0.dev0" | ||||
| description = "Download compute kernels" | ||||
| authors = [ | ||||
|   { name = "OlivierDehaene", email = "olivier@huggingface.co" }, | ||||
| @ -12,7 +12,7 @@ license = { text = "Apache-2.0" } | ||||
| readme = "README.md" | ||||
| requires-python = ">= 3.9" | ||||
| dependencies = [ | ||||
|   "huggingface_hub>=0.26.0,<2.0", | ||||
|   "huggingface_hub>=0.26.0,<1.0", | ||||
|   "packaging>=20.0", | ||||
|   "pyyaml>=6", | ||||
|   "tomli>=2.0; python_version<'3.11'", | ||||
| @ -34,7 +34,6 @@ dev = [ | ||||
| ] | ||||
|  | ||||
| [project.optional-dependencies] | ||||
| abi-check = ["kernel-abi-check>=0.6.2,<0.7.0"] | ||||
| torch = ["torch"] | ||||
| docs = [ | ||||
|   "hf-doc-builder", | ||||
| @ -46,9 +45,6 @@ kernels = "kernels.cli:main" | ||||
| [project.entry-points."egg_info.writers"] | ||||
| "kernels.lock" = "kernels.lockfile:write_egg_lockfile" | ||||
|  | ||||
| [tool.isort] | ||||
| profile = "black" | ||||
| line_length = 119 | ||||
|  | ||||
| [tool.ruff] | ||||
| exclude = [ | ||||
| @ -75,4 +71,4 @@ line-length = 119 | ||||
| # Ignored rules: | ||||
| # "E501" -> line length violation | ||||
| lint.ignore = ["E501"] | ||||
| lint.select = ["E", "F", "W"] | ||||
| lint.select = ["E", "F", "I", "W"] | ||||
|  | ||||
| @ -3,7 +3,3 @@ markers = | ||||
|     cuda_only: marks tests that should only hosts with CUDA GPUs | ||||
|     rocm_only: marks tests that should only run on hosts with ROCm GPUs | ||||
|     darwin_only: marks tests that should only run on macOS | ||||
|     xpu_only: marks tests that should only run on hosts with Intel XPUs | ||||
|     npu_only: marks tests that should only run on Ascend NPUs | ||||
|     token: enable tests that require a write token | ||||
|     is_staging_test: Marks tests that should only run on a staging environment | ||||
|  | ||||
| @ -1,7 +1,3 @@ | ||||
| import importlib.metadata | ||||
|  | ||||
| __version__ = importlib.metadata.version("kernels") | ||||
|  | ||||
| from kernels.layer import ( | ||||
|     CUDAProperties, | ||||
|     Device, | ||||
| @ -25,7 +21,6 @@ from kernels.utils import ( | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
|     "__version__", | ||||
|     "CUDAProperties", | ||||
|     "Device", | ||||
|     "LayerRepository", | ||||
|  | ||||
| @ -1,142 +0,0 @@ | ||||
| import sys | ||||
| from pathlib import Path | ||||
|  | ||||
| from huggingface_hub import snapshot_download | ||||
| from kernel_abi_check import ( | ||||
|     BinaryFormat, | ||||
|     IncompatibleAbi3Symbol, | ||||
|     IncompatibleMacOSVersion, | ||||
|     IncompatibleManylinuxSymbol, | ||||
|     MissingMacOSVersion, | ||||
|     NonAbi3Symbol, | ||||
|     ObjectFile, | ||||
| ) | ||||
|  | ||||
| from kernels.utils import CACHE_DIR | ||||
|  | ||||
|  | ||||
| def check_kernel( | ||||
|     *, macos: str, manylinux: str, python_abi: str, repo_id: str, revision: str | ||||
| ): | ||||
|     variants_path = ( | ||||
|         Path( | ||||
|             snapshot_download( | ||||
|                 repo_id, | ||||
|                 allow_patterns=["build/*"], | ||||
|                 cache_dir=CACHE_DIR, | ||||
|                 revision=revision, | ||||
|             ) | ||||
|         ) | ||||
|         / "build" | ||||
|     ) | ||||
|  | ||||
|     has_issues = False | ||||
|     for variant_path in variants_path.iterdir(): | ||||
|         if not variant_path.is_dir(): | ||||
|             print( | ||||
|                 f"⛔ `build/` must only contain directories, found: {variant_path.name}", | ||||
|                 file=sys.stderr, | ||||
|             ) | ||||
|             has_issues = True | ||||
|             continue | ||||
|  | ||||
|         print(f"Checking variant: {variant_path.name}", file=sys.stderr) | ||||
|  | ||||
|         indent = 2 | ||||
|  | ||||
|         for dylib_path in variant_path.rglob("*.so"): | ||||
|             print_with_indent( | ||||
|                 indent, | ||||
|                 f"Dynamic library {dylib_path.relative_to(variant_path)}:", | ||||
|             ) | ||||
|  | ||||
|             o = ObjectFile(dylib_path) | ||||
|             has_issues |= check_abi3(o, python_abi, indent + 2) | ||||
|  | ||||
|             # TODO: also check operating system | ||||
|             if o.format() == BinaryFormat.ELF: | ||||
|                 has_issues |= check_manylinux(o, manylinux, indent + 2) | ||||
|             elif o.format() == BinaryFormat.MACH_O: | ||||
|                 has_issues |= check_macos(o, macos, indent + 2) | ||||
|  | ||||
|     if has_issues: | ||||
|         sys.exit(1) | ||||
|  | ||||
|  | ||||
| def check_abi3(object_file: ObjectFile, python_abi: str, indent: int) -> bool: | ||||
|     has_issues = False | ||||
|     violations = object_file.check_python_abi(python_abi) | ||||
|     if violations != []: | ||||
|         has_issues = True | ||||
|         print_with_indent( | ||||
|             indent, | ||||
|             f"⛔ Found symbols that are incompatible with Python ABI {python_abi}:", | ||||
|         ) | ||||
|         for violation in violations: | ||||
|             if isinstance(violation, IncompatibleAbi3Symbol): | ||||
|                 print_with_indent( | ||||
|                     indent + 3, | ||||
|                     f"{violation.name}: {violation.version_added}", | ||||
|                 ) | ||||
|             elif isinstance(violation, NonAbi3Symbol): | ||||
|                 print_with_indent( | ||||
|                     indent + 3, | ||||
|                     f"{violation.name}", | ||||
|                 ) | ||||
|     else: | ||||
|         print_with_indent(indent, f"🐍 Python ABI {python_abi} compatible") | ||||
|  | ||||
|     return has_issues | ||||
|  | ||||
|  | ||||
| def check_macos(object_file: ObjectFile, macos: str, indent: int) -> bool: | ||||
|     has_issues = False | ||||
|     violations = object_file.check_macos(macos) | ||||
|     if violations != []: | ||||
|         has_issues = True | ||||
|         print_with_indent( | ||||
|             indent, | ||||
|             f"⛔ Found incompatibility with macOS {macos}:", | ||||
|         ) | ||||
|  | ||||
|         for violation in violations: | ||||
|             if isinstance(violation, MissingMacOSVersion): | ||||
|                 print_with_indent( | ||||
|                     indent + 3, | ||||
|                     "shared library does not contain macOS version", | ||||
|                 ) | ||||
|             elif isinstance(violation, IncompatibleMacOSVersion): | ||||
|                 print_with_indent( | ||||
|                     indent + 3, | ||||
|                     f"shared library requires macOS {violation.version}", | ||||
|                 ) | ||||
|     else: | ||||
|         print_with_indent(indent, f"🍏 compatible with macOS {macos}") | ||||
|  | ||||
|     return has_issues | ||||
|  | ||||
|  | ||||
| def check_manylinux(object_file: ObjectFile, manylinux: str, indent: int) -> bool: | ||||
|     has_issues = False | ||||
|     violations = object_file.check_manylinux(manylinux) | ||||
|     if violations != []: | ||||
|         has_issues = True | ||||
|         print_with_indent( | ||||
|             indent, | ||||
|             f"⛔ Found symbols that are incompatible with {manylinux}:", | ||||
|         ) | ||||
|  | ||||
|         for violation in violations: | ||||
|             if isinstance(violation, IncompatibleManylinuxSymbol): | ||||
|                 print_with_indent( | ||||
|                     indent + 3, | ||||
|                     f"{violation.name}_{violation.dep}: {violation.version}", | ||||
|                 ) | ||||
|     else: | ||||
|         print_with_indent(indent, f"🐧 {manylinux} compatible") | ||||
|  | ||||
|     return has_issues | ||||
|  | ||||
|  | ||||
| def print_with_indent(indent: int, message: str): | ||||
|     print(f"{' ' * indent}{message}", file=sys.stderr) | ||||
| @ -1,11 +1,10 @@ | ||||
| import argparse | ||||
| import dataclasses | ||||
| import json | ||||
| import re | ||||
| import sys | ||||
| from pathlib import Path | ||||
|  | ||||
| from huggingface_hub import create_repo, upload_folder, create_branch | ||||
| from huggingface_hub import create_repo, upload_folder | ||||
|  | ||||
| from kernels.compat import tomllib | ||||
| from kernels.lockfile import KernelLock, get_kernel_locks | ||||
| @ -14,8 +13,6 @@ from kernels.utils import install_kernel, install_kernel_all_variants | ||||
| from .doc import generate_readme_for_kernel | ||||
| from .wheel import build_variant_to_wheel | ||||
|  | ||||
| BUILD_VARIANT_REGEX = re.compile(r"^(torch\d+\d+|torch-universal)") | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     parser = argparse.ArgumentParser( | ||||
| @ -23,31 +20,6 @@ def main(): | ||||
|     ) | ||||
|     subparsers = parser.add_subparsers(required=True) | ||||
|  | ||||
|     check_parser = subparsers.add_parser("check", help="Check a kernel for compliance") | ||||
|     check_parser.add_argument("repo_id", type=str, help="The kernel repo ID") | ||||
|     check_parser.add_argument( | ||||
|         "--revision", | ||||
|         type=str, | ||||
|         default="main", | ||||
|         help="The kernel revision (branch, tag, or commit SHA, defaults to 'main')", | ||||
|     ) | ||||
|     check_parser.add_argument("--macos", type=str, help="macOS version", default="15.0") | ||||
|     check_parser.add_argument( | ||||
|         "--manylinux", type=str, help="Manylinux version", default="manylinux_2_28" | ||||
|     ) | ||||
|     check_parser.add_argument( | ||||
|         "--python-abi", type=str, help="Python ABI version", default="3.9" | ||||
|     ) | ||||
|     check_parser.set_defaults( | ||||
|         func=lambda args: check_kernel( | ||||
|             macos=args.macos, | ||||
|             manylinux=args.manylinux, | ||||
|             python_abi=args.python_abi, | ||||
|             repo_id=args.repo_id, | ||||
|             revision=args.revision, | ||||
|         ) | ||||
|     ) | ||||
|  | ||||
|     download_parser = subparsers.add_parser("download", help="Download locked kernels") | ||||
|     download_parser.add_argument( | ||||
|         "project_dir", | ||||
| @ -68,15 +40,10 @@ def main(): | ||||
|         help="Directory of the kernel build", | ||||
|     ) | ||||
|     upload_parser.add_argument( | ||||
|         "--repo-id", | ||||
|         "--repo_id", | ||||
|         type=str, | ||||
|         help="Repository ID to use to upload to the Hugging Face Hub", | ||||
|     ) | ||||
|     upload_parser.add_argument( | ||||
|         "--branch", | ||||
|         type=None, | ||||
|         help="If set, the upload will be made to a particular branch of the provided `repo-id`.", | ||||
|     ) | ||||
|     upload_parser.add_argument( | ||||
|         "--private", | ||||
|         action="store_true", | ||||
| @ -207,31 +174,17 @@ def lock_kernels(args): | ||||
|  | ||||
|  | ||||
| def upload_kernels(args): | ||||
|     # Resolve `kernel_dir` to be uploaded. | ||||
|     kernel_dir = Path(args.kernel_dir).resolve() | ||||
|  | ||||
|     build_dir = None | ||||
|     for candidate in [kernel_dir / "build", kernel_dir]: | ||||
|         variants = [ | ||||
|             variant_path | ||||
|             for variant_path in candidate.glob("torch*") | ||||
|             if BUILD_VARIANT_REGEX.match(variant_path.name) is not None | ||||
|         ] | ||||
|         if variants: | ||||
|             build_dir = candidate | ||||
|             break | ||||
|     if build_dir is None: | ||||
|         raise ValueError( | ||||
|             f"Couldn't find any build variants in: {kernel_dir.absolute()} or {(kernel_dir / 'build').absolute()}" | ||||
|         ) | ||||
|     build_dir = kernel_dir / "build" | ||||
|     if not kernel_dir.is_dir(): | ||||
|         raise ValueError(f"{kernel_dir} is not a directory") | ||||
|     if not build_dir.is_dir(): | ||||
|         raise ValueError("Couldn't find `build` directory inside `kernel_dir`") | ||||
|  | ||||
|     repo_id = create_repo( | ||||
|         repo_id=args.repo_id, private=args.private, exist_ok=True | ||||
|     ).repo_id | ||||
|  | ||||
|     if args.branch is not None: | ||||
|         create_branch(repo_id=repo_id, branch=args.branch, exist_ok=True) | ||||
|  | ||||
|     delete_patterns: set[str] = set() | ||||
|     for build_variant in build_dir.iterdir(): | ||||
|         if build_variant.is_dir(): | ||||
| @ -240,7 +193,6 @@ def upload_kernels(args): | ||||
|     upload_folder( | ||||
|         repo_id=repo_id, | ||||
|         folder_path=build_dir, | ||||
|         revision=args.branch, | ||||
|         path_in_repo="build", | ||||
|         delete_patterns=list(delete_patterns), | ||||
|         commit_message="Build uploaded using `kernels`.", | ||||
| @ -253,24 +205,3 @@ class _JSONEncoder(json.JSONEncoder): | ||||
|         if dataclasses.is_dataclass(o): | ||||
|             return dataclasses.asdict(o) | ||||
|         return super().default(o) | ||||
|  | ||||
|  | ||||
| def check_kernel( | ||||
|     *, macos: str, manylinux: str, python_abi: str, repo_id: str, revision: str | ||||
| ): | ||||
|     try: | ||||
|         import kernels.check | ||||
|     except ImportError: | ||||
|         print( | ||||
|             "`kernels check` requires the `kernel-abi-check` package: pip install kernel-abi-check", | ||||
|             file=sys.stderr, | ||||
|         ) | ||||
|         sys.exit(1) | ||||
|  | ||||
|     kernels.check.check_kernel( | ||||
|         macos=macos, | ||||
|         manylinux=manylinux, | ||||
|         python_abi=python_abi, | ||||
|         repo_id=repo_id, | ||||
|         revision=revision, | ||||
|     ) | ||||
|  | ||||
| @ -111,10 +111,10 @@ def generate_readme_for_kernel(repo_id: str, *, revision: str = "main") -> None: | ||||
| def generate_metadata(module: ModuleType) -> None: | ||||
|     metadata = getattr(module, "__kernel_metadata__", {}) | ||||
|     if "tags" not in metadata: | ||||
|         metadata["tags"] = ["kernels"] | ||||
|         metadata["tags"] = ["kernel"] | ||||
|     else: | ||||
|         if "kernels" not in metadata["tags"]: | ||||
|             metadata["tags"].append("kernels") | ||||
|         if "kernel" not in metadata["tags"]: | ||||
|             metadata["tags"].append("kernel") | ||||
|  | ||||
|     print("---") | ||||
|     print(yaml.dump(metadata), end="") | ||||
|  | ||||
| @ -87,7 +87,7 @@ class Device: | ||||
|  | ||||
|     Args: | ||||
|         type (`str`): | ||||
|             The device type (e.g., "cuda", "mps", "npu", "rocm", "xpu"). | ||||
|             The device type (e.g., "cuda", "mps", "rocm"). | ||||
|         properties ([`CUDAProperties`], *optional*): | ||||
|             Device-specific properties. Currently only [`CUDAProperties`] is supported for CUDA devices. | ||||
|  | ||||
| @ -106,12 +106,6 @@ class Device: | ||||
|  | ||||
|         # MPS device for Apple Silicon | ||||
|         mps_device = Device(type="mps") | ||||
|  | ||||
|         # XPU device (e.g., Intel(R) Data Center GPU Max 1550) | ||||
|         xpu_device = Device(type="xpu") | ||||
|  | ||||
|         # NPU device (Huawei Ascend) | ||||
|         npu_device = Device(type="npu") | ||||
|         ``` | ||||
|     """ | ||||
|  | ||||
| @ -131,10 +125,6 @@ class Device: | ||||
|             return _ROCMRepos() | ||||
|         elif self.type == "mps": | ||||
|             return _MPSRepos() | ||||
|         elif self.type == "xpu": | ||||
|             return _XPURepos() | ||||
|         elif self.type == "npu": | ||||
|             return _NPURepos() | ||||
|         else: | ||||
|             raise ValueError(f"Unknown device type: {self.type}") | ||||
|  | ||||
| @ -321,7 +311,7 @@ class LayerRepository: | ||||
|         return hash((self.layer_name, self._repo_id, self._revision, self._version)) | ||||
|  | ||||
|     def __str__(self) -> str: | ||||
|         return f"`{self._repo_id}` (revision: {self._resolve_revision()}), layer `{self.layer_name}`" | ||||
|         return f"`{self._repo_id}` (revision: {self._resolve_revision()}) for layer `{self.layer_name}`" | ||||
|  | ||||
|  | ||||
| class LocalLayerRepository: | ||||
| @ -377,7 +367,7 @@ class LocalLayerRepository: | ||||
|         return hash((self.layer_name, self._repo_path, self._package_name)) | ||||
|  | ||||
|     def __str__(self) -> str: | ||||
|         return f"`{self._repo_path}` (package: {self._package_name}), layer `{self.layer_name}`" | ||||
|         return f"`{self._repo_path}` (package: {self._package_name}) for layer `{self.layer_name}`" | ||||
|  | ||||
|  | ||||
| class LockedLayerRepository: | ||||
| @ -432,7 +422,7 @@ class LockedLayerRepository: | ||||
|         return hash((self.layer_name, self._repo_id)) | ||||
|  | ||||
|     def __str__(self) -> str: | ||||
|         return f"`{self._repo_id}` (revision: {self._resolve_revision()}), layer `{self.layer_name}`" | ||||
|         return f"`{self._repo_id}` (revision: {self._resolve_revision()}) for layer `{self.layer_name}`" | ||||
|  | ||||
|  | ||||
| _CACHED_LAYER: Dict[LayerRepositoryProtocol, Type["nn.Module"]] = {} | ||||
| @ -457,46 +447,6 @@ class _DeviceRepos(ABC): | ||||
|         ... | ||||
|  | ||||
|  | ||||
| class _XPURepos(_DeviceRepos): | ||||
|     _repos: Dict[Mode, LayerRepositoryProtocol] | ||||
|  | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self._repos = {} | ||||
|  | ||||
|     @property | ||||
|     def repos( | ||||
|         self, | ||||
|     ) -> Optional[Dict[Mode, LayerRepositoryProtocol]]: | ||||
|         return self._repos | ||||
|  | ||||
|     def insert(self, device: Device, repos: Dict[Mode, LayerRepositoryProtocol]): | ||||
|         if device.type != "xpu": | ||||
|             raise ValueError(f"Device type must be 'xpu', got {device.type}") | ||||
|  | ||||
|         self._repos = repos | ||||
|  | ||||
|  | ||||
| class _NPURepos(_DeviceRepos): | ||||
|     _repos: Dict[Mode, LayerRepositoryProtocol] | ||||
|  | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self._repos = {} | ||||
|  | ||||
|     @property | ||||
|     def repos( | ||||
|         self, | ||||
|     ) -> Optional[Dict[Mode, LayerRepositoryProtocol]]: | ||||
|         return self._repos | ||||
|  | ||||
|     def insert(self, device: Device, repos: Dict[Mode, LayerRepositoryProtocol]): | ||||
|         if device.type != "npu": | ||||
|             raise ValueError(f"Device type must be 'npu', got {device.type}") | ||||
|  | ||||
|         self._repos = repos | ||||
|  | ||||
|  | ||||
| class _MPSRepos(_DeviceRepos): | ||||
|     _repos: Dict[Mode, LayerRepositoryProtocol] | ||||
|  | ||||
| @ -581,7 +531,7 @@ class _ROCMRepos(_DeviceRepos): | ||||
|  | ||||
| def _validate_device_type(device_type: str) -> None: | ||||
|     """Validate that the device type is supported.""" | ||||
|     supported_devices = {"cuda", "mps", "npu", "rocm", "xpu"} | ||||
|     supported_devices = {"cuda", "rocm", "mps"} | ||||
|     if device_type not in supported_devices: | ||||
|         raise ValueError( | ||||
|             f"Unsupported device type '{device_type}'. Supported device types are: {', '.join(sorted(supported_devices))}" | ||||
| @ -839,7 +789,7 @@ def kernelize( | ||||
|             `Mode.TRAINING | Mode.TORCH_COMPILE` kernelizes the model for training with | ||||
|             `torch.compile`. | ||||
|         device (`Union[str, torch.device]`, *optional*): | ||||
|             The device type to load kernels for. Supported device types are: "cuda", "mps", "npu", "rocm", "xpu". | ||||
|             The device type to load kernels for. Supported device types are: "cuda", "mps", "rocm". | ||||
|             The device type will be inferred from the model parameters when not provided. | ||||
|         use_fallback (`bool`, *optional*, defaults to `True`): | ||||
|             Whether to use the original forward method of modules when no compatible kernel could be found. | ||||
| @ -863,7 +813,7 @@ def kernelize( | ||||
|                 return F.silu(x[..., :d]) * x[..., d:] | ||||
|  | ||||
|         mapping = { | ||||
|             "SiluAndMul": { | ||||
|             "LayerNorm": { | ||||
|                 "cuda": LayerRepository( | ||||
|                     repo_id="kernels-community/activation", | ||||
|                     layer_name="SiluAndMul", | ||||
| @ -1045,7 +995,7 @@ def _get_kernel_layer(repo: LayerRepositoryProtocol) -> Type["nn.Module"]: | ||||
|     return layer | ||||
|  | ||||
|  | ||||
| def _validate_layer(*, check_cls, cls, repo: LayerRepositoryProtocol): | ||||
| def _validate_layer(*, check_cls, cls): | ||||
|     import torch.nn as nn | ||||
|  | ||||
|     # The layer must have at least have the following properties: (1) it | ||||
| @ -1054,12 +1004,12 @@ def _validate_layer(*, check_cls, cls, repo: LayerRepositoryProtocol): | ||||
|     # methods. | ||||
|  | ||||
|     if not issubclass(cls, nn.Module): | ||||
|         raise TypeError(f"Layer `{cls.__name__}` is not a Torch layer.") | ||||
|         raise TypeError(f"Layer `{cls}` is not a Torch layer.") | ||||
|  | ||||
|     # We verify statelessness by checking that the does not have its own | ||||
|     # constructor (since the constructor could add member variables)... | ||||
|     if cls.__init__ is not nn.Module.__init__: | ||||
|         raise TypeError(f"{repo} must not override nn.Module constructor.") | ||||
|         raise TypeError("Layer must not override nn.Module constructor.") | ||||
|  | ||||
|     # ... or predefined member variables. | ||||
|     torch_module_members = {name for name, _ in inspect.getmembers(nn.Module)} | ||||
| @ -1067,9 +1017,7 @@ def _validate_layer(*, check_cls, cls, repo: LayerRepositoryProtocol): | ||||
|     difference = cls_members - torch_module_members | ||||
|     # verify if : difference ⊄ {"can_torch_compile", "has_backward"} | ||||
|     if not difference <= {"can_torch_compile", "has_backward"}: | ||||
|         raise TypeError( | ||||
|             f"{repo} must not contain additional members compared to `{check_cls.__name__}`." | ||||
|         ) | ||||
|         raise TypeError("Layer must not contain additional members.") | ||||
|  | ||||
|     # Check whether the forward signatures are similar. | ||||
|     params = inspect.signature(cls.forward).parameters | ||||
| @ -1077,13 +1025,13 @@ def _validate_layer(*, check_cls, cls, repo: LayerRepositoryProtocol): | ||||
|  | ||||
|     if len(params) != len(ref_params): | ||||
|         raise TypeError( | ||||
|             f"Forward signature of {repo} does not match `{check_cls.__name__}`: different number of arguments." | ||||
|             "Forward signature does not match: different number of arguments." | ||||
|         ) | ||||
|  | ||||
|     for param, ref_param in zip(params.values(), ref_params.values()): | ||||
|         if param.kind != ref_param.kind: | ||||
|             raise TypeError( | ||||
|                 f"Forward signature of {repo} does not match `{check_cls.__name__}`: different kind of arguments ({param} ({param.kind}) and {ref_param} ({ref_param.kind})" | ||||
|                 f"Forward signature does not match: different kind of arguments ({param} ({param.kind}) and {ref_param} ({ref_param.kind})" | ||||
|             ) | ||||
|  | ||||
|  | ||||
| @ -1200,7 +1148,7 @@ def _get_layer_memoize( | ||||
|         return layer | ||||
|  | ||||
|     layer = _get_kernel_layer(repo) | ||||
|     _validate_layer(check_cls=module_class, cls=layer, repo=repo) | ||||
|     _validate_layer(check_cls=module_class, cls=layer) | ||||
|     _CACHED_LAYER[repo] = layer | ||||
|  | ||||
|     return layer | ||||
|  | ||||
| @ -11,7 +11,7 @@ import sys | ||||
| from importlib.metadata import Distribution | ||||
| from pathlib import Path | ||||
| from types import ModuleType | ||||
| from typing import Dict, List, Optional, Tuple, Union | ||||
| from typing import Dict, List, Optional, Tuple | ||||
|  | ||||
| from huggingface_hub import file_exists, snapshot_download | ||||
| from packaging.version import parse | ||||
| @ -19,8 +19,6 @@ from packaging.version import parse | ||||
| from kernels._versions import select_revision_or_version | ||||
| from kernels.lockfile import KernelLock, VariantLock | ||||
|  | ||||
| ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} | ||||
|  | ||||
|  | ||||
| def _get_cache_dir() -> Optional[str]: | ||||
|     """Returns the kernels cache directory.""" | ||||
| @ -37,14 +35,6 @@ def _get_cache_dir() -> Optional[str]: | ||||
| CACHE_DIR: Optional[str] = _get_cache_dir() | ||||
|  | ||||
|  | ||||
| def _get_privateuse_backend_name() -> Optional[str]: | ||||
|     import torch | ||||
|  | ||||
|     if hasattr(torch._C, "_get_privateuse1_backend_name"): | ||||
|         return torch._C._get_privateuse1_backend_name() | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def build_variant() -> str: | ||||
|     import torch | ||||
|  | ||||
| @ -56,17 +46,11 @@ def build_variant() -> str: | ||||
|         compute_framework = f"rocm{rocm_version.major}{rocm_version.minor}" | ||||
|     elif torch.backends.mps.is_available(): | ||||
|         compute_framework = "metal" | ||||
|     elif hasattr(torch.version, "xpu") and torch.version.xpu is not None: | ||||
|         version = torch.version.xpu | ||||
|         compute_framework = f"xpu{version[0:4]}{version[5:6]}" | ||||
|     elif _get_privateuse_backend_name() == "npu": | ||||
|         from torch_npu.utils.collect_env import get_cann_version  # type: ignore[import-not-found] | ||||
|  | ||||
|         cann_major, cann_minor = get_cann_version()[0], get_cann_version()[2] | ||||
|         compute_framework = f"cann{cann_major}{cann_minor}" | ||||
|     elif hasattr(torch, "xpu") and torch.xpu.is_available(): | ||||
|         compute_framework = "xpu" | ||||
|     else: | ||||
|         raise AssertionError( | ||||
|             "Torch was not compiled with CUDA, Metal, XPU, NPU, or ROCm enabled." | ||||
|             "Torch was not compiled with CUDA, Metal, XPU, or ROCm enabled." | ||||
|         ) | ||||
|  | ||||
|     torch_version = parse(torch.__version__) | ||||
| @ -110,7 +94,6 @@ def install_kernel( | ||||
|     revision: str, | ||||
|     local_files_only: bool = False, | ||||
|     variant_locks: Optional[Dict[str, VariantLock]] = None, | ||||
|     user_agent: Optional[Union[str, dict]] = None, | ||||
| ) -> Tuple[str, Path]: | ||||
|     """ | ||||
|     Download a kernel for the current environment to the cache. | ||||
| @ -126,8 +109,6 @@ def install_kernel( | ||||
|             Whether to only use local files and not download from the Hub. | ||||
|         variant_locks (`Dict[str, VariantLock]`, *optional*): | ||||
|             Optional dictionary of variant locks for validation. | ||||
|         user_agent (`Union[str, dict]`, *optional*): | ||||
|             The `user_agent` info to pass to `snapshot_download()` for internal telemetry. | ||||
|  | ||||
|     Returns: | ||||
|         `Tuple[str, Path]`: A tuple containing the package name and the path to the variant directory. | ||||
| @ -135,7 +116,6 @@ def install_kernel( | ||||
|     package_name = package_name_from_repo_id(repo_id) | ||||
|     variant = build_variant() | ||||
|     universal_variant = universal_build_variant() | ||||
|     user_agent = _get_user_agent(user_agent=user_agent) | ||||
|     repo_path = Path( | ||||
|         snapshot_download( | ||||
|             repo_id, | ||||
| @ -143,7 +123,6 @@ def install_kernel( | ||||
|             cache_dir=CACHE_DIR, | ||||
|             revision=revision, | ||||
|             local_files_only=local_files_only, | ||||
|             user_agent=user_agent, | ||||
|         ) | ||||
|     ) | ||||
|  | ||||
| @ -220,10 +199,7 @@ def install_kernel_all_variants( | ||||
|  | ||||
|  | ||||
| def get_kernel( | ||||
|     repo_id: str, | ||||
|     revision: Optional[str] = None, | ||||
|     version: Optional[str] = None, | ||||
|     user_agent: Optional[Union[str, dict]] = None, | ||||
|     repo_id: str, revision: Optional[str] = None, version: Optional[str] = None | ||||
| ) -> ModuleType: | ||||
|     """ | ||||
|     Load a kernel from the kernel hub. | ||||
| @ -239,8 +215,6 @@ def get_kernel( | ||||
|         version (`str`, *optional*): | ||||
|             The kernel version to download. This can be a Python version specifier, such as `">=1.0.0,<2.0.0"`. | ||||
|             Cannot be used together with `revision`. | ||||
|         user_agent (`Union[str, dict]`, *optional*): | ||||
|             The `user_agent` info to pass to `snapshot_download()` for internal telemetry. | ||||
|  | ||||
|     Returns: | ||||
|         `ModuleType`: The imported kernel module. | ||||
| @ -257,9 +231,7 @@ def get_kernel( | ||||
|         ``` | ||||
|     """ | ||||
|     revision = select_revision_or_version(repo_id, revision, version) | ||||
|     package_name, package_path = install_kernel( | ||||
|         repo_id, revision=revision, user_agent=user_agent | ||||
|     ) | ||||
|     package_name, package_path = install_kernel(repo_id, revision=revision) | ||||
|     return import_from_path(package_name, package_path / package_name / "__init__.py") | ||||
|  | ||||
|  | ||||
| @ -515,29 +487,3 @@ def git_hash_object(data: bytes, object_type: str = "blob"): | ||||
|  | ||||
| def package_name_from_repo_id(repo_id: str) -> str: | ||||
|     return repo_id.split("/")[-1].replace("-", "_") | ||||
|  | ||||
|  | ||||
| def _get_user_agent( | ||||
|     user_agent: Optional[Union[dict, str]] = None, | ||||
| ) -> Union[None, dict, str]: | ||||
|     import torch | ||||
|  | ||||
|     from . import __version__ | ||||
|  | ||||
|     if os.getenv("DISABLE_TELEMETRY", "false").upper() in ENV_VARS_TRUE_VALUES: | ||||
|         return None | ||||
|  | ||||
|     if user_agent is None: | ||||
|         user_agent = {} | ||||
|     if isinstance(user_agent, dict): | ||||
|         user_agent.update( | ||||
|             { | ||||
|                 "kernels": __version__, | ||||
|                 "torch": torch.__version__, | ||||
|                 "build_variant": build_variant(), | ||||
|                 "file_type": "kernel", | ||||
|             } | ||||
|         ) | ||||
|     elif isinstance(user_agent, str): | ||||
|         user_agent += f"; kernels/{__version__}; torch/{torch.__version__}; build_variant/{build_variant()}; file_type/kernel" | ||||
|     return user_agent | ||||
|  | ||||
| @ -3,8 +3,6 @@ import sys | ||||
| import pytest | ||||
| import torch | ||||
|  | ||||
| from kernels.utils import _get_privateuse_backend_name | ||||
|  | ||||
| has_cuda = ( | ||||
|     hasattr(torch.version, "cuda") | ||||
|     and torch.version.cuda is not None | ||||
| @ -15,20 +13,6 @@ has_rocm = ( | ||||
|     and torch.version.hip is not None | ||||
|     and torch.cuda.device_count() > 0 | ||||
| ) | ||||
| has_xpu = ( | ||||
|     hasattr(torch.version, "xpu") | ||||
|     and torch.version.xpu is not None | ||||
|     and torch.xpu.device_count() > 0 | ||||
| ) | ||||
| has_npu = _get_privateuse_backend_name() == "npu" | ||||
|  | ||||
|  | ||||
| def pytest_addoption(parser): | ||||
|     parser.addoption( | ||||
|         "--token", | ||||
|         action="store_true", | ||||
|         help="run tests that require a token with write permissions", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def pytest_runtest_setup(item): | ||||
| @ -38,9 +22,3 @@ def pytest_runtest_setup(item): | ||||
|         pytest.skip("skipping ROCm-only test on host without ROCm") | ||||
|     if "darwin_only" in item.keywords and not sys.platform.startswith("darwin"): | ||||
|         pytest.skip("skipping macOS-only test on non-macOS platform") | ||||
|     if "xpu_only" in item.keywords and not has_xpu: | ||||
|         pytest.skip("skipping XPU-only test on host without XPU") | ||||
|     if "npu_only" in item.keywords and not has_npu: | ||||
|         pytest.skip("skipping NPU-only test on host without NPU") | ||||
|     if "token" in item.keywords and not item.config.getoption("--token"): | ||||
|         pytest.skip("need --token option to run this test") | ||||
|  | ||||
| @ -1,70 +1,82 @@ | ||||
| [ | ||||
|   { | ||||
|     "repo_id": "kernels-community/activation", | ||||
|     "sha": "83046852be158d525114f68513cd79fd88911b37", | ||||
|     "sha": "fd6842e88f1f23f198551d78a4541b8eb07e0538", | ||||
|     "variants": { | ||||
|       "torch25-cxx11-cu118-x86_64-linux": { | ||||
|         "hash": "sha256-61e3e51b5b59b30d4a6ba943a5e6e4ef5a9c8260cc4bca40b9fb462c0777842b", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch25-cxx11-cu121-x86_64-linux": { | ||||
|         "hash": "sha256-baa6b872040730bd1d676c011381f6f626fb96189837b828f587c806af8994fa", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch25-cxx11-cu124-x86_64-linux": { | ||||
|         "hash": "sha256-c1ec7457847fa1f0e4ab43234dfc3cd0959977e03dc2ffe89b4f6b90970c7965", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch25-cxx98-cu118-x86_64-linux": { | ||||
|         "hash": "sha256-412f9c841f20741e42f2c6cdb8c7da0e33ab436b219975acffe18b62b97ecd7c", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch25-cxx98-cu121-x86_64-linux": { | ||||
|         "hash": "sha256-2fde7f97859506e000c1072b3916c0a75bc8cee750a9853ea8b68199e7b57bcd", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch25-cxx98-cu124-x86_64-linux": { | ||||
|         "hash": "sha256-93309986f39a64a5630378108154866f0545178fa8dfef9b8f8ccfef9a78608e", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx11-cu118-x86_64-linux": { | ||||
|         "hash": "sha256-3284d3c64b76d92c1ee930bce8013aff307f16eefb16c2d5dea9f2ca70e71e1f", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx11-cu124-x86_64-linux": { | ||||
|         "hash": "sha256-36a8c93773c08ddf8ef624a8a6b2866be26d1861450dfe1ecac0bed59f9ffa47", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx11-cu126-aarch64-linux": { | ||||
|         "hash": "sha256-f5afb734520f587717665659798ff738a69e5ae1e34d4bd95624edd18fb165cd", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx11-cu126-x86_64-linux": { | ||||
|         "hash": "sha256-940841a7cb44f76c9a896d8b39f5bc0e0420f1c4c05ae9423da96778de4d1f2c", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx98-cu118-x86_64-linux": { | ||||
|         "hash": "sha256-8e0f907830c3acc8c6bebfc162c744012ff6973e8110d7bf8ecd74b492418204", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx98-cu124-x86_64-linux": { | ||||
|         "hash": "sha256-0833414cbe658baec55b7ff63537cddccc973fe99e3c03008cced5e66e38b6c1", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx98-cu126-aarch64-linux": { | ||||
|         "hash": "sha256-d94fa59a13a5b623b2071aadcd1e6c8477c4d557fd06ad144f15b46b1fc71aab", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch26-cxx98-cu126-x86_64-linux": { | ||||
|         "hash": "sha256-64784f5f2f9e232d0f2fd824fbc47eadde505e3c232f351bead5b04c429c65c2", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch27-cxx11-cu118-x86_64-linux": { | ||||
|         "hash": "sha256-e34965c814c4c092fcb634ebadefe82ea9a05b98343f8ebdefa7305dcc05359e", | ||||
|         "hash": "sha256-bcba3765f061649bac0e5a9159bea8349ced4780e24a2330aa62ce0f8d3a9d78", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch27-cxx11-cu126-aarch64-linux": { | ||||
|         "hash": "sha256-e4625df5706af025c70bd824d952b928d9a2965eeaefda72fc47be0fae680c5e", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch27-cxx11-cu126-x86_64-linux": { | ||||
|         "hash": "sha256-5f92b35922b37224a416398a39a29b7e5f1aca1df17d5c69f1b9e9cdb7033561", | ||||
|         "hash": "sha256-7d7d3e655f34a7b03d5603d7c1ab723ef3efc823291762421a8b3a4aa51bd405", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch27-cxx11-cu128-aarch64-linux": { | ||||
|         "hash": "sha256-125967cb23bacd2cec443799f184ac08247dfff33f5027e54ee16d3779ca5986", | ||||
|         "hash": "sha256-60e076194dcd55b32c5aca72f09816cba0fff52f340c8a063b17ff0577154d99", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch27-cxx11-cu128-x86_64-linux": { | ||||
|         "hash": "sha256-496a84c99d7035a1b6f0ea1c026b751c3a2677956f4c1be546d3cc1505a5fdbb", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu126-aarch64-linux": { | ||||
|         "hash": "sha256-f0775a30ffa290c90aba3a41037e3ca91edb15b4a9367561fafd5f25455e117a", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu126-x86_64-linux": { | ||||
|         "hash": "sha256-081995e6230f306bdf6111186618794f2411cf0ffd9b4800330df60b4ebe1927", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu128-aarch64-linux": { | ||||
|         "hash": "sha256-b937fef62a0c1cd71ab98490b651c473577af209b9a3e2a6b452350283d8812c", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu128-x86_64-linux": { | ||||
|         "hash": "sha256-a3915686cc58641a3361ece63ab77b33e9d30315dea12547e4bda008d8810a01", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu129-aarch64-linux": { | ||||
|         "hash": "sha256-a24dca8e998f88be42491921c9df89d88a6112ca630acd2efc2dd34a64b91fcb", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch28-cxx11-cu129-x86_64-linux": { | ||||
|         "hash": "sha256-df6c70a70f425db2f68b86561c6f93c5675c1d5e5d058766d88ab17472229907", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu126-aarch64-linux": { | ||||
|         "hash": "sha256-c120011c201072b4cfd70c2ba2d45c2f05337feaf604ddec3c6c4987def33ab3", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu126-x86_64-linux": { | ||||
|         "hash": "sha256-765a7f3279009979be4001a23c5c70e5e6ab9553098d67886731a5275a6d4b32", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu128-aarch64-linux": { | ||||
|         "hash": "sha256-266d057a9cd82b872a0e02f09ac5e2660fcffcf9a7b7fa1fa8ff33dc19c0f5c2", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu128-x86_64-linux": { | ||||
|         "hash": "sha256-6850e594ba4588f289b5904eb88eda5a41870ee20a3bf1586f3268307caf4b53", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu130-aarch64-linux": { | ||||
|         "hash": "sha256-23741b935462b53bdf868f8d1c9c8cff5f02f71ea3b0550df41dc8b030b0b474", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       }, | ||||
|       "torch29-cxx11-cu130-x86_64-linux": { | ||||
|         "hash": "sha256-b884ae792dc1eada071f31645add0c2c76d479864f25aebcdd8318b675aaaf29", | ||||
|         "hash": "sha256-f0a3802382efdcd78b40601187a9c416579a24ef2ed5a60d2296ef0951a89597", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       } | ||||
|     } | ||||
|  | ||||
| @ -35,7 +35,6 @@ def test_load_locked(): | ||||
|     load_kernel("kernels-community/activation", lockfile=project_dir / "kernels.lock") | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| def test_layer_locked(): | ||||
|     project_dir = Path(__file__).parent / "layer_locking" | ||||
|  | ||||
|  | ||||
| @ -6,13 +6,11 @@ from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from typing import List | ||||
|  | ||||
| import pytest | ||||
| from huggingface_hub import delete_repo, model_info, list_repo_refs | ||||
| from huggingface_hub import model_info | ||||
|  | ||||
| from kernels.cli import upload_kernels | ||||
|  | ||||
| REPO_ID = "valid_org/kernels-upload-test" | ||||
|  | ||||
| REPO_ID = "kernels-test/kernels-upload-test" | ||||
|  | ||||
| PY_CONTENT = """\ | ||||
| #!/usr/bin/env python3 | ||||
| @ -30,7 +28,6 @@ class UploadArgs: | ||||
|     kernel_dir: None | ||||
|     repo_id: None | ||||
|     private: False | ||||
|     branch: None | ||||
|  | ||||
|  | ||||
| def next_filename(path: Path) -> Path: | ||||
| @ -69,39 +66,7 @@ def get_filenames_from_a_repo(repo_id: str) -> List[str]: | ||||
|         logging.error(f"Error connecting to the Hub: {e}.") | ||||
|  | ||||
|  | ||||
| @pytest.mark.token | ||||
| @pytest.mark.is_staging_test | ||||
| @pytest.mark.parametrize("branch", (None, "foo")) | ||||
| def test_kernel_upload_works_as_expected(branch): | ||||
|     with tempfile.TemporaryDirectory() as tmpdir: | ||||
|         path = f"{tmpdir}/build/torch-universal/upload_test" | ||||
|         build_dir = Path(path) | ||||
|         build_dir.mkdir(parents=True, exist_ok=True) | ||||
|         script_path = build_dir / "foo.py" | ||||
|         script_path.write_text(PY_CONTENT) | ||||
|         upload_kernels(UploadArgs(tmpdir, REPO_ID, False, branch)) | ||||
|  | ||||
|     repo_filenames = get_filenames_from_a_repo(REPO_ID) | ||||
|     assert any(str(script_path.name) for f in repo_filenames) | ||||
|  | ||||
|     if branch is not None: | ||||
|         refs = list_repo_refs(repo_id=REPO_ID) | ||||
|         assert any(ref_branch.name == branch for ref_branch in refs.branches) | ||||
|  | ||||
|     delete_repo(repo_id=REPO_ID) | ||||
|  | ||||
|  | ||||
| @pytest.mark.token | ||||
| @pytest.mark.is_staging_test | ||||
| def test_kernel_upload_deletes_as_expected(): | ||||
|     with tempfile.TemporaryDirectory() as tmpdir: | ||||
|         path = f"{tmpdir}/build/torch-universal/upload_test" | ||||
|         build_dir = Path(path) | ||||
|         build_dir.mkdir(parents=True, exist_ok=True) | ||||
|         script_path = build_dir / "foo_2025.py" | ||||
|         script_path.write_text(PY_CONTENT) | ||||
|         upload_kernels(UploadArgs(tmpdir, REPO_ID, False, None)) | ||||
|  | ||||
|     repo_filenames = get_filenames_from_a_repo(REPO_ID) | ||||
|     filename_to_change = get_filename_to_change(repo_filenames) | ||||
|  | ||||
| @ -112,11 +77,10 @@ def test_kernel_upload_deletes_as_expected(): | ||||
|         changed_filename = next_filename(Path(filename_to_change)) | ||||
|         script_path = build_dir / changed_filename | ||||
|         script_path.write_text(PY_CONTENT) | ||||
|         upload_kernels(UploadArgs(tmpdir, REPO_ID, False, None)) | ||||
|         upload_kernels(UploadArgs(tmpdir, REPO_ID, False)) | ||||
|  | ||||
|     repo_filenames = get_filenames_from_a_repo(REPO_ID) | ||||
|     assert any(str(changed_filename) in k for k in repo_filenames), f"{repo_filenames=}" | ||||
|     assert not any( | ||||
|         str(filename_to_change) in k for k in repo_filenames | ||||
|     ), f"{repo_filenames=}" | ||||
|     delete_repo(repo_id=REPO_ID) | ||||
|  | ||||
| @ -21,21 +21,14 @@ from kernels.layer import ( | ||||
|     _KERNEL_MAPPING, | ||||
|     _validate_layer, | ||||
| ) | ||||
| from kernels.utils import ( | ||||
|     _get_privateuse_backend_name, | ||||
|     install_kernel, | ||||
| ) | ||||
| from kernels.utils import install_kernel | ||||
|  | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         Device(type="cuda"): LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|         ), | ||||
|         "npu": LayerRepository( | ||||
|             repo_id="kernels-ext-npu/SwiGlu", | ||||
|             layer_name="SwiGlu", | ||||
|         ), | ||||
|         ) | ||||
|     }, | ||||
|     "SiluAndMulNoCompile": { | ||||
|         "cuda": LayerRepository( | ||||
| @ -53,37 +46,11 @@ kernel_layer_mapping = { | ||||
|             layer_name="SiluAndMul", | ||||
|         ) | ||||
|     }, | ||||
|     "LigerRMSNorm": { | ||||
|         "xpu": LayerRepository( | ||||
|             repo_id="kernels-community/liger_kernels", | ||||
|             layer_name="LigerRMSNorm",  # Triton | ||||
|         ) | ||||
|     }, | ||||
| } | ||||
|  | ||||
| register_kernel_mapping(kernel_layer_mapping) | ||||
|  | ||||
|  | ||||
| class RMSNorm(nn.Module): | ||||
|     def __init__(self, weight: torch.Tensor, eps: float = 1e-6): | ||||
|         super().__init__() | ||||
|         # Used to check that we called hub kernel. | ||||
|         self.n_calls = 0 | ||||
|         self.weight = nn.Parameter(weight) | ||||
|         self.variance_epsilon = eps | ||||
|  | ||||
|     def forward(self, x: torch.Tensor): | ||||
|         self.n_calls += 1 | ||||
|         var = x.pow(2).mean(-1, keepdim=True) | ||||
|         x_norm = x * torch.rsqrt(var + self.variance_epsilon) | ||||
|         return x_norm * self.weight | ||||
|  | ||||
|  | ||||
| @use_kernel_forward_from_hub("LigerRMSNorm") | ||||
| class RMSNormWithKernel(RMSNorm): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class SiluAndMul(nn.Module): | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
| @ -123,18 +90,6 @@ class TorchLinearWithCounter(nn.Linear): | ||||
|         return super().forward(input) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def device(): | ||||
|     if torch.cuda.is_available(): | ||||
|         return "cuda" | ||||
|     elif hasattr(torch, "xpu") and torch.xpu.is_available(): | ||||
|         return "xpu" | ||||
|     elif _get_privateuse_backend_name() == "npu": | ||||
|         return "npu" | ||||
|  | ||||
|     pytest.skip("No CUDA, NPU or XPU") | ||||
|  | ||||
|  | ||||
| def test_arg_kinds(): | ||||
|     @use_kernel_forward_from_hub("ArgKind") | ||||
|     class ArgKind(nn.Module): | ||||
| @ -192,54 +147,6 @@ def test_hub_forward_rocm(): | ||||
|     assert silu_and_mul_with_kernel.n_calls in [0, 1] | ||||
|  | ||||
|  | ||||
| @pytest.mark.xpu_only | ||||
| def test_hub_forward_xpu(): | ||||
|     torch.manual_seed(0) | ||||
|  | ||||
|     hidden_size = 1024 | ||||
|     weight = torch.ones(hidden_size, device="xpu") | ||||
|     rms_norm = RMSNorm(weight).to("xpu") | ||||
|     X = torch.randn(4, 16, hidden_size, device="xpu", dtype=torch.float32) | ||||
|     Y = rms_norm(X) | ||||
|  | ||||
|     rms_norm_with_kernel = kernelize( | ||||
|         RMSNormWithKernel(weight), mode=Mode.INFERENCE, device="xpu" | ||||
|     ) | ||||
|     Y_kernel = rms_norm_with_kernel(X) | ||||
|  | ||||
|     torch.testing.assert_close(Y_kernel, Y) | ||||
|  | ||||
|     assert rms_norm.n_calls == 1 | ||||
|     assert rms_norm_with_kernel.n_calls == 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.npu_only | ||||
| def test_hub_forward_npu(): | ||||
|     torch.manual_seed(0) | ||||
|  | ||||
|     silu_and_mul = SiluAndMul() | ||||
|     X = torch.randn((32, 64), device="npu") | ||||
|     Y = silu_and_mul(X) | ||||
|  | ||||
|     silu_and_mul_with_kernel = kernelize( | ||||
|         SiluAndMulWithKernel(), device="npu", mode=Mode.INFERENCE | ||||
|     ) | ||||
|     Y_kernel = silu_and_mul_with_kernel(X) | ||||
|  | ||||
|     torch.testing.assert_close(Y_kernel, Y) | ||||
|  | ||||
|     assert silu_and_mul.n_calls == 1 | ||||
|     assert silu_and_mul_with_kernel.n_calls == 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif( | ||||
|     hasattr(torch, "xpu") and getattr(torch.xpu, "is_available", lambda: False)(), | ||||
|     reason="Skip on xpu devices", | ||||
| ) | ||||
| @pytest.mark.skipif( | ||||
|     _get_privateuse_backend_name() == "npu", | ||||
|     reason="Skip on npu devices", | ||||
| ) | ||||
| def test_rocm_kernel_mapping(): | ||||
|     """Test that ROCm shorthand device mapping works correctly.""" | ||||
|     kernel_layer_mapping = { | ||||
| @ -327,16 +234,16 @@ def test_layer_fallback_works(): | ||||
|     kernelize(silu_and_mul, device="cuda", mode=Mode.INFERENCE) | ||||
|  | ||||
|  | ||||
| def test_local_layer_repo(device): | ||||
| def test_local_layer_repo(): | ||||
|     # Fetch a kernel to the local cache. | ||||
|     package_name, path = install_kernel("kernels-test/backward-marker-test", "main") | ||||
|  | ||||
|     linear = TorchLinearWithCounter(32, 32).to(device) | ||||
|     linear = TorchLinearWithCounter(32, 32).to("cuda") | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Linear": { | ||||
|                 device: LocalLayerRepository( | ||||
|                 "cuda": LocalLayerRepository( | ||||
|                     # install_kernel will give the fully-resolved path. | ||||
|                     repo_path=path.parent.parent, | ||||
|                     package_name=package_name, | ||||
| @ -348,7 +255,7 @@ def test_local_layer_repo(device): | ||||
|     ): | ||||
|         kernelize(linear, mode=Mode.INFERENCE) | ||||
|  | ||||
|     X = torch.randn(10, 32, device=device) | ||||
|     X = torch.randn(10, 32, device="cuda") | ||||
|     linear(X) | ||||
|     assert linear.n_calls == 0 | ||||
|  | ||||
| @ -416,7 +323,6 @@ def test_mapping_contexts(): | ||||
|         "SiluAndMul", | ||||
|         "SiluAndMulStringDevice", | ||||
|         "SiluAndMulNoCompile", | ||||
|         "LigerRMSNorm", | ||||
|     } | ||||
|  | ||||
|     extra_mapping1 = { | ||||
| @ -434,7 +340,6 @@ def test_mapping_contexts(): | ||||
|             "SiluAndMul", | ||||
|             "SiluAndMulStringDevice", | ||||
|             "SiluAndMulNoCompile", | ||||
|             "LigerRMSNorm", | ||||
|             "TestKernel", | ||||
|         } | ||||
|  | ||||
| @ -453,7 +358,6 @@ def test_mapping_contexts(): | ||||
|                 "SiluAndMul", | ||||
|                 "SiluAndMulStringDevice", | ||||
|                 "SiluAndMulNoCompile", | ||||
|                 "LigerRMSNorm", | ||||
|                 "TestKernel", | ||||
|             } | ||||
|             assert ( | ||||
| @ -467,7 +371,6 @@ def test_mapping_contexts(): | ||||
|             "SiluAndMul", | ||||
|             "SiluAndMulStringDevice", | ||||
|             "SiluAndMulNoCompile", | ||||
|             "LigerRMSNorm", | ||||
|             "TestKernel", | ||||
|         } | ||||
|         assert ( | ||||
| @ -490,7 +393,6 @@ def test_mapping_contexts(): | ||||
|             "SiluAndMul", | ||||
|             "SiluAndMulStringDevice", | ||||
|             "SiluAndMulNoCompile", | ||||
|             "LigerRMSNorm", | ||||
|             "TestKernel", | ||||
|         } | ||||
|         assert ( | ||||
| @ -502,7 +404,6 @@ def test_mapping_contexts(): | ||||
|         "SiluAndMul", | ||||
|         "SiluAndMulStringDevice", | ||||
|         "SiluAndMulNoCompile", | ||||
|         "LigerRMSNorm", | ||||
|     } | ||||
|  | ||||
|  | ||||
| @ -512,43 +413,26 @@ def test_validate_kernel_layer(): | ||||
|             super().__init__(*args, **kwargs) | ||||
|             self.foo = 42 | ||||
|  | ||||
|     def stub_repo(layer): | ||||
|         return LayerRepository( | ||||
|             repo_id="kernels-test/nonexisting", layer_name=layer.__name__ | ||||
|         ) | ||||
|  | ||||
|     with pytest.raises( | ||||
|         TypeError, | ||||
|         match="`kernels-test/nonexisting`.*layer `BadLayer` must not override", | ||||
|     ): | ||||
|         _validate_layer(cls=BadLayer, check_cls=SiluAndMul, repo=stub_repo(BadLayer)) | ||||
|     with pytest.raises(TypeError, match="not override"): | ||||
|         _validate_layer(cls=BadLayer, check_cls=SiluAndMul) | ||||
|  | ||||
|     class BadLayer2(nn.Module): | ||||
|         foo: int = 42 | ||||
|  | ||||
|     with pytest.raises( | ||||
|         TypeError, | ||||
|         match="`kernels-test/nonexisting`.*layer `BadLayer2` must not contain.*SiluAndMul", | ||||
|     ): | ||||
|         _validate_layer(cls=BadLayer2, check_cls=SiluAndMul, repo=stub_repo(BadLayer2)) | ||||
|     with pytest.raises(TypeError, match="not contain additional members"): | ||||
|         _validate_layer(cls=BadLayer2, check_cls=SiluAndMul) | ||||
|  | ||||
|     class BadLayer3(nn.Module): | ||||
|         def forward(self, x: torch.Tensor, foo: int) -> torch.Tensor: ... | ||||
|  | ||||
|     with pytest.raises( | ||||
|         TypeError, | ||||
|         match="Forward.*`kernels-test/nonexisting`.*layer `BadLayer3` does not match `SiluAndMul`: different number of arguments", | ||||
|     ): | ||||
|         _validate_layer(cls=BadLayer3, check_cls=SiluAndMul, repo=stub_repo(BadLayer3)) | ||||
|     with pytest.raises(TypeError, match="different number of arguments"): | ||||
|         _validate_layer(cls=BadLayer3, check_cls=SiluAndMul) | ||||
|  | ||||
|     class BadLayer4(nn.Module): | ||||
|         def forward(self, *, x: torch.Tensor) -> torch.Tensor: ... | ||||
|  | ||||
|     with pytest.raises( | ||||
|         TypeError, | ||||
|         match="Forward.*`kernels-test/nonexisting`.*layer `BadLayer4` does not match `SiluAndMul`: different kind of arguments", | ||||
|     ): | ||||
|         _validate_layer(cls=BadLayer4, check_cls=SiluAndMul, repo=stub_repo(BadLayer4)) | ||||
|     with pytest.raises(TypeError, match="different kind of arguments"): | ||||
|         _validate_layer(cls=BadLayer4, check_cls=SiluAndMul) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @ -1039,7 +923,7 @@ def test_kernel_modes_cross_fallback(): | ||||
|         assert linear.n_calls == 2 | ||||
|  | ||||
|  | ||||
| def test_layer_versions(device): | ||||
| def test_layer_versions(): | ||||
|     @use_kernel_forward_from_hub("Version") | ||||
|     class Version(nn.Module): | ||||
|         def forward(self) -> str: | ||||
| @ -1050,20 +934,20 @@ def test_layer_versions(device): | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 Device(type=device): LayerRepository( | ||||
|                 Device(type="cuda"): LayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                 ) | ||||
|             } | ||||
|         } | ||||
|     ): | ||||
|         version = kernelize(version, device=device, mode=Mode.INFERENCE) | ||||
|         version = kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|         assert version() == "0.2.0" | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 Device(type=device): LayerRepository( | ||||
|                 Device(type="cuda"): LayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                     version="<1.0.0", | ||||
| @ -1071,13 +955,13 @@ def test_layer_versions(device): | ||||
|             } | ||||
|         } | ||||
|     ): | ||||
|         version = kernelize(version, device=device, mode=Mode.INFERENCE) | ||||
|         version = kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|         assert version() == "0.2.0" | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 Device(type=device): LayerRepository( | ||||
|                 Device(type="cuda"): LayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                     version="<0.2.0", | ||||
| @ -1085,13 +969,13 @@ def test_layer_versions(device): | ||||
|             } | ||||
|         } | ||||
|     ): | ||||
|         version = kernelize(version, device=device, mode=Mode.INFERENCE) | ||||
|         version = kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|         assert version() == "0.1.1" | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 Device(type=device): LayerRepository( | ||||
|                 Device(type="cuda"): LayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                     version=">0.1.0,<0.2.0", | ||||
| @ -1099,13 +983,13 @@ def test_layer_versions(device): | ||||
|             } | ||||
|         } | ||||
|     ): | ||||
|         version = kernelize(version, device=device, mode=Mode.INFERENCE) | ||||
|         version = kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|         assert version() == "0.1.1" | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 Device(type=device): LayerRepository( | ||||
|                 Device(type="cuda"): LayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                     version=">0.2.0", | ||||
| @ -1114,13 +998,13 @@ def test_layer_versions(device): | ||||
|         } | ||||
|     ): | ||||
|         with pytest.raises(ValueError, match=r"No version.*satisfies requirement"): | ||||
|             kernelize(version, device=device, mode=Mode.INFERENCE) | ||||
|             kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r"Either a revision or a version.*not both"): | ||||
|         use_kernel_mapping( | ||||
|             { | ||||
|                 "Version": { | ||||
|                     Device(type=device): LayerRepository( | ||||
|                     Device(type="cuda"): LayerRepository( | ||||
|                         repo_id="kernels-test/versions", | ||||
|                         layer_name="Version", | ||||
|                         revision="v0.1.0", | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	