mirror of
				https://github.com/huggingface/kernels.git
				synced 2025-10-31 19:54:28 +08:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			faq-kernel
			...
			v0.6.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 6ab42cbfcc | 
							
								
								
									
										17
									
								
								.github/workflows/build_documentation.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								.github/workflows/build_documentation.yaml
									
									
									
									
										vendored
									
									
								
							| @ -1,17 +0,0 @@ | ||||
| name: Build documentation | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|       - doc-builder* | ||||
|       - v*-release | ||||
|  | ||||
| jobs: | ||||
|   build: | ||||
|     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main | ||||
|     with: | ||||
|       commit_sha: ${{ github.sha }} | ||||
|       package: kernels | ||||
|     secrets: | ||||
|       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} | ||||
							
								
								
									
										15
									
								
								.github/workflows/build_pr_documentation.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/build_pr_documentation.yaml
									
									
									
									
										vendored
									
									
								
							| @ -1,15 +0,0 @@ | ||||
| name: Build PR Documentation | ||||
|  | ||||
| on: pull_request | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   build: | ||||
|     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main | ||||
|     with: | ||||
|       commit_sha: ${{ github.event.pull_request.head.sha }} | ||||
|       pr_number: ${{ github.event.number }} | ||||
|       package: kernels | ||||
							
								
								
									
										21
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							| @ -8,24 +8,3 @@ jobs: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Run ruff | ||||
|         uses: astral-sh/ruff-action@v3 | ||||
|  | ||||
|   black: | ||||
|     name: Run black check | ||||
|     runs-on: ubuntu-latest | ||||
|     env: | ||||
|       UV_PYTHON_PREFERENCE: only-managed | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|  | ||||
|       - name: Install uv and set the python version | ||||
|         uses: astral-sh/setup-uv@v5 | ||||
|         with: | ||||
|           python-version: 3.12 | ||||
|  | ||||
|       - name: Install black | ||||
|         run: uv pip install black | ||||
|  | ||||
|       - name: Check formatting | ||||
|         run: | | ||||
|           uv run black --check src | ||||
|           uv run black --check tests | ||||
|  | ||||
							
								
								
									
										7
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							| @ -51,10 +51,7 @@ jobs: | ||||
|         run: uv run mypy src/kernels | ||||
|  | ||||
|       - name: Run tests | ||||
|         env: | ||||
|           HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||||
|         run: | | ||||
|           uv run pytest tests | ||||
|         run: uv run pytest tests | ||||
|  | ||||
|       - name: Check kernel conversion | ||||
|         run: | | ||||
| @ -66,7 +63,7 @@ jobs: | ||||
|       - name: Check README generation | ||||
|         # For now, just checks that generation doesn't fail. | ||||
|         run: | | ||||
|           uv run kernels generate-readme kernels-community/triton-layer-norm | ||||
|           uv run kernels generate-readme kernels-community/triton-layer-norm --revision docs | ||||
|  | ||||
|       - name: Import check without torch | ||||
|         run: | | ||||
|  | ||||
							
								
								
									
										16
									
								
								.github/workflows/upload_pr_documentation.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/workflows/upload_pr_documentation.yaml
									
									
									
									
										vendored
									
									
								
							| @ -1,16 +0,0 @@ | ||||
| name: Upload PR Documentation | ||||
|  | ||||
| on: | ||||
|   workflow_run: | ||||
|     workflows: ["Build PR Documentation"] | ||||
|     types: | ||||
|       - completed | ||||
|  | ||||
| jobs: | ||||
|   build: | ||||
|     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main | ||||
|     with: | ||||
|       package_name: kernels | ||||
|     secrets: | ||||
|       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} | ||||
|       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} | ||||
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							| @ -56,12 +56,10 @@ the Hub. | ||||
|  | ||||
| ## 📚 Documentation | ||||
|  | ||||
| - [Introduction](docs/source/index.md) | ||||
| - [Installation](docs/source/installation.md) | ||||
| - [Basic usage](docs/source/basic-usage.md) | ||||
| - [Using layers](docs/source/layers.md) | ||||
| - [Locking kernel/layer versions](docs/source/locking.md) | ||||
| - [Environment variables](docs/source/env.md) | ||||
| - [Kernel requirements](docs/source/kernel-requirements.md) | ||||
| - [Frequently Asked Questions](docs/source/faq.md) | ||||
| - [Using layers](docs/layers.md) | ||||
| - [Locking kernel versions](docs/locking.md) | ||||
| - [Environment variables](docs/env.md) | ||||
| - [Using kernels in a Docker container](docs/docker.md) | ||||
| - [Kernel requirements](docs/kernel-requirements.md) | ||||
| - [Frequently Asked Questions](docs/faq.md) | ||||
| - [Writing kernels](https://github.com/huggingface/kernel-builder/blob/main/docs/writing-kernels.md) using [kernel-builder](https://github.com/huggingface/kernel-builder/) | ||||
|  | ||||
							
								
								
									
										8
									
								
								docs/docker.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								docs/docker.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| # Using kernels in a Docker container | ||||
|  | ||||
| build and run the reference [examples/basic.py](examples/basic.py) in a Docker container with the following commands: | ||||
|  | ||||
| ```bash | ||||
| docker build --platform linux/amd64 -t kernels-reference -f docker/Dockerfile.reference . | ||||
| docker run --gpus all -it --rm -e HF_TOKEN=$HF_TOKEN kernels-reference | ||||
| ``` | ||||
							
								
								
									
										13
									
								
								docs/faq.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								docs/faq.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| # FAQ | ||||
|  | ||||
| ## Why is the kernelization step needed? | ||||
|  | ||||
| In earlier versions of `kernels`, a layer's `forward` was replaced by | ||||
| `use_kernel_forward_from_hub` and `replace_kernel_forward_from_hub`. The | ||||
| new `forward` would dispatch to a kernel based on the device type, | ||||
| whether a model was training, etc. However, this approach was | ||||
| fundamentally incompatible with `torch.compile` since it relied | ||||
| on data-dependent branching. | ||||
|  | ||||
| To avoid branching, we have to make dispatch decisions ahead of time, | ||||
| which is what the `kernelize` function does. | ||||
| @ -34,8 +34,6 @@ Kernels are versioned on the Hub using Git tags. Version tags must be of | ||||
| the form `v<major>.<minor>.<patch>`. Versions are used by [locking](./locking.md) | ||||
| to resolve the version constraints. | ||||
| 
 | ||||
| We recommend using [semver](https://semver.org/) to version kernels. | ||||
| 
 | ||||
| ## Native Python module | ||||
| 
 | ||||
| Kernels will typically contain a native Python module with precompiled | ||||
| @ -52,12 +50,13 @@ have dynamic library dependencies outside: | ||||
|   for compatibility with Python 3.9 and later. | ||||
| - Compatible with [`manylinux_2_28`](https://github.com/pypa/manylinux?tab=readme-ov-file#manylinux_2_28-almalinux-8-based). | ||||
|   This means that the extension **must not** use symbols versions higher than: | ||||
| 
 | ||||
|   - GLIBC 2.28 | ||||
|   - GLIBCXX 3.4.24 | ||||
|   - CXXABI 1.3.11 | ||||
|   - GCC 7.0.0 | ||||
| 
 | ||||
| These requirements can be checked with the ABI checker (see below). | ||||
| These requirement can be checked with the ABI checker (see below). | ||||
| 
 | ||||
| ### macOS | ||||
| 
 | ||||
							
								
								
									
										134
									
								
								docs/layers.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								docs/layers.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,134 @@ | ||||
| # Layers | ||||
|  | ||||
| A kernel can provide layers in addition to kernel functions. A layer from | ||||
| the Hub can replace the `forward` method of an existing layer for a certain | ||||
| device type. This makes it possible to provide more performant kernels for | ||||
| existing layers. | ||||
|  | ||||
| See [Kernel requirements](kernel-requirements.md) for more information the | ||||
| requirements of Hub layers. | ||||
|  | ||||
| ## Making a layer extensible with kernels from the hub | ||||
|  | ||||
| ### Using a decorator | ||||
|  | ||||
| A layer can be made extensible with the `use_kernel_forward_from_hub` | ||||
| decorator. For example: | ||||
|  | ||||
| ```python | ||||
| @use_kernel_forward_from_hub("SiluAndMul") | ||||
| class SiluAndMul(nn.Module): | ||||
|     def forward(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         d = input.shape[-1] // 2 | ||||
|         return F.silu(input[..., :d]) * input[..., d:] | ||||
| ``` | ||||
|  | ||||
| The decorator does not change the behavior of the class -- it annotates | ||||
| the class with the given name (here `SiluAndMul`). The `kernelize` function | ||||
| described below uses this name to look up kernels for the layer. | ||||
|  | ||||
| ### External layers | ||||
|  | ||||
| An existing layer that does not (yet) have the `use_kernel_forward_from_hub` | ||||
| decorator can be made extensible using the `replace_kernel_forward_from_hub` | ||||
| function: | ||||
|  | ||||
| ```python | ||||
| from somelibrary import SiluAndMul | ||||
|  | ||||
| replace_kernel_forward_from_hub(SiluAndMul, "SiluAndMul") | ||||
| ``` | ||||
|  | ||||
| **Warning:** we strongly recommend using layers with a decorator, since | ||||
| it signifies that the maintainer intends to keep the `forward` signature | ||||
| compatible with layers from the hub. | ||||
|  | ||||
| ## Kernelizing a model | ||||
|  | ||||
| A model will not use Hub kernels by default, even if it contains extensible | ||||
| layers. To enable the use of Hub kernels in the model, it needs to be | ||||
| 'kernelized' using the `kernelize` function. This function traverses the | ||||
| model graph and replaces the `forward` methods of extensible layers for which | ||||
| Hub kernels are registered. Kernelize can be used as follows: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model) | ||||
| ``` | ||||
|  | ||||
| **Note:** the `kernelize` function modifies the model in-place, the model | ||||
| itself is returned as a convenience. | ||||
|  | ||||
| ### Kernel device | ||||
|  | ||||
| Kernels can be registered per device type. For instance, separate `cuda` and | ||||
| `metal` kernels could be registered for the name `SiluAndMul`. By default, | ||||
| `kernelize` will try to infer the device type from the model's parameters. | ||||
| You can pass the device type to `kernelize` if the device type cannot be | ||||
| inferred (e.g. because the model has no parameters): | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, device="cuda") | ||||
| ``` | ||||
|  | ||||
| ### `torch.compile` | ||||
|  | ||||
| Not all Hub kernels support `torch.compile`. If you want to compile a model | ||||
| after kernelizing it, pass the `needs_torch_compile` argument to ensure that | ||||
| only kernels that support `torch.compile` will be loaded: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, needs_torch_compile=True) | ||||
| ``` | ||||
|  | ||||
| ### Fallback forward | ||||
|  | ||||
| The `needs_torch_compile` argument will fall back to the layer's original | ||||
| `forward` if the registered kernels does not support `torch.compile`. You | ||||
| can let `kernelize` raise an exception instead by using `use_fallback=False`: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, needs_torch_compile=True, use_fallback=False) | ||||
| ``` | ||||
|  | ||||
| This can be useful if you want to guarantee that Hub kernels are used. | ||||
|  | ||||
| ## Registering a hub kernel for a layer | ||||
|  | ||||
| `kernelize`` relies on kernel mappings to find Hub kernels for layers. | ||||
| Kernel mappings map a kernel name such as `SiluAndMul` to a kernel on | ||||
| the Hub. For example: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|             revision="layers", | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| You can register such a mapping using `register_kernel_mapping`: | ||||
|  | ||||
| ```python | ||||
| register_kernel_mapping(kernel_layer_mapping) | ||||
| ``` | ||||
|  | ||||
| This will register the kernel mapping in the current context, which is | ||||
| normally global. It is recommended to scope the mapping to where it is | ||||
| used with the `use_kernel_mapping` context manager: | ||||
|  | ||||
| ```python | ||||
| with use_kernel_mapping(kernel_layer_mapping): | ||||
|     # Use the layer for which the mapping is applied. | ||||
|     model = kernelize(model) | ||||
| ``` | ||||
|  | ||||
| This ensures that the mapping is not active anymore outside the | ||||
| `with`-scope. | ||||
| @ -1,4 +1,4 @@ | ||||
| # Locking kernel/layer versions | ||||
| # Locking kernel versions | ||||
| 
 | ||||
| Projects that use `setuptools` can lock the kernel versions that should be | ||||
| used. First specify the accepted versions in `pyproject.toml` and make | ||||
| @ -26,24 +26,6 @@ activation = get_locked_kernel("kernels-community/activation") | ||||
| **Note:** the lock file is included in the package metadata, so it will only be visible | ||||
| to `kernels` after doing an (editable or regular) installation of your project. | ||||
| 
 | ||||
| ## Locked kernel layers | ||||
| 
 | ||||
| Locking is also supported for kernel layers. To use locked layers, register them | ||||
| with the `LockedLayerRepository` class: | ||||
| 
 | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": LockedLayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| register_kernel_mapping(kernel_layer_mapping) | ||||
| ``` | ||||
| 
 | ||||
| ## Pre-downloading locked kernels | ||||
| 
 | ||||
| Locked kernels can be pre-downloaded by running `kernels download .` in your | ||||
| @ -1,30 +0,0 @@ | ||||
| - sections: | ||||
|     - local: index | ||||
|       title: Introduction | ||||
|     - local: installation | ||||
|       title: Installation | ||||
|   title: Getting started | ||||
| - sections: | ||||
|     - local: basic-usage | ||||
|       title: Basic Usage | ||||
|     - local: layers | ||||
|       title: Using Layers | ||||
|     - local: locking | ||||
|       title: Locking Kernel Versions | ||||
|     - local: env | ||||
|       title: Environment Variables | ||||
|     - local: faq | ||||
|       title: FAQ | ||||
|   title: Usage Guide | ||||
| - sections: | ||||
|     - local: api/kernels | ||||
|       title: Kernels | ||||
|     - local: api/layers | ||||
|       title: Layers | ||||
|     - local: cli | ||||
|       title: Kernels CLI | ||||
|   title: API Reference | ||||
| - sections: | ||||
|     - local: kernel-requirements | ||||
|       title: Kernel Requirements | ||||
|   title: Developer Guide | ||||
| @ -1,21 +0,0 @@ | ||||
| # Kernels API Reference | ||||
|  | ||||
| ## Main Functions | ||||
|  | ||||
| ### get_kernel | ||||
|  | ||||
| [[autodoc]] kernels.get_kernel | ||||
|  | ||||
| ### has_kernel | ||||
|  | ||||
| [[autodoc]] kernels.has_kernel | ||||
|  | ||||
| ## Loading locked kernels | ||||
|  | ||||
| ### load_kernel | ||||
|  | ||||
| [[autodoc]] kernels.load_kernel | ||||
|  | ||||
| ### get_locked_kernel | ||||
|  | ||||
| [[autodoc]] kernels.get_locked_kernel | ||||
| @ -1,41 +0,0 @@ | ||||
| # Layers API Reference | ||||
|  | ||||
| ## Making layers kernel-aware | ||||
|  | ||||
| ### use_kernel_forward_from_hub | ||||
|  | ||||
| [[autodoc]] kernels.use_kernel_forward_from_hub | ||||
|  | ||||
| ### replace_kernel_forward_from_hub | ||||
|  | ||||
| [[autodoc]] kernels.replace_kernel_forward_from_hub | ||||
|  | ||||
| ## Registering kernel mappings | ||||
|  | ||||
| ### use_kernel_mapping | ||||
|  | ||||
| [[autodoc]] kernels.use_kernel_mapping | ||||
|  | ||||
| ### register_kernel_mapping | ||||
|  | ||||
| [[autodoc]] kernels.register_kernel_mapping | ||||
|  | ||||
| ## Kernelizing a model | ||||
|  | ||||
| ### kernelize | ||||
|  | ||||
| [[autodoc]] kernels.kernelize | ||||
|  | ||||
| ## Classes | ||||
|  | ||||
| ### Device | ||||
|  | ||||
| [[autodoc]] kernels.Device | ||||
|  | ||||
| ### Mode | ||||
|  | ||||
| [[autodoc]] kernels.Mode | ||||
|  | ||||
| ### LayerRepository | ||||
|  | ||||
| [[autodoc]] kernels.LayerRepository | ||||
| @ -1,50 +0,0 @@ | ||||
| # Basic Usage | ||||
|  | ||||
| ## Loading Kernels | ||||
|  | ||||
| Here is how you would use the [activation](https://huggingface.co/kernels-community/activation) kernels from the Hugging Face Hub: | ||||
|  | ||||
| ```python | ||||
| import torch | ||||
| from kernels import get_kernel | ||||
|  | ||||
| # Download optimized kernels from the Hugging Face hub | ||||
| activation = get_kernel("kernels-community/activation") | ||||
|  | ||||
| # Create a random tensor | ||||
| x = torch.randn((10, 10), dtype=torch.float16, device="cuda") | ||||
|  | ||||
| # Run the kernel | ||||
| y = torch.empty_like(x) | ||||
| activation.gelu_fast(y, x) | ||||
|  | ||||
| print(y) | ||||
| ``` | ||||
|  | ||||
| ### Using version bounds | ||||
|  | ||||
| Kernels are versioned using tags of the form `v<major>.<minor>.<patch>`. | ||||
| You can specify which version to download using Python version specifiers: | ||||
|  | ||||
| ```python | ||||
| import torch | ||||
| from kernels import get_kernel | ||||
|  | ||||
| activation = get_kernel("kernels-community/activation", version=">=0.0.4,<0.1.0") | ||||
| ``` | ||||
|  | ||||
| This will get the latest kernel tagged `v0.0.z` where `z` is at least 4. It | ||||
| is strongly recommended to specify a version bound, since a kernel author | ||||
| might push incompatible changes to the `main` branch. | ||||
|  | ||||
| ## Checking Kernel Availability | ||||
|  | ||||
| You can check if a specific kernel is available for your environment: | ||||
|  | ||||
| ```python | ||||
| from kernels import has_kernel | ||||
|  | ||||
| # Check if kernel is available for current environment | ||||
| is_available = has_kernel("kernels-community/activation") | ||||
| print(f"Kernel available: {is_available}") | ||||
| ``` | ||||
| @ -1,41 +0,0 @@ | ||||
| # Kernels CLI Reference | ||||
|  | ||||
| ## Main Functions | ||||
|  | ||||
| ### kernels to-wheel | ||||
|  | ||||
| We strongly recommend downloading kernels from the Hub using the `kernels` | ||||
| package, since this comes with large [benefits](index.md) over using Python | ||||
| wheels. That said, some projects may require deployment of kernels as | ||||
| wheels. The `kernels` utility provides a simple solution to this. You can | ||||
| convert any Hub kernel into a set of wheels with the `to-wheel` command: | ||||
|  | ||||
| ```bash | ||||
| $ kernels to-wheel drbh/img2grey 1.1.2 | ||||
| ☸ img2grey-1.1.2+torch27cu128cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu124cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu126cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu128cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx98-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu126cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu126cxx11-cp39-abi3-manylinux_2_28_aarch64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu118cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu124cxx98-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch26cu118cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ☸ img2grey-1.1.2+torch27cu118cxx11-cp39-abi3-manylinux_2_28_x86_64.whl | ||||
| ``` | ||||
|  | ||||
| ### kernels upload | ||||
|  | ||||
| Use `kernels upload <dir_containing_build> --repo_id="hub-username/kernel"` to upload | ||||
| your kernel builds to the Hub. | ||||
|  | ||||
| **Notes**: | ||||
|  | ||||
| - This will take care of creating a repository on the Hub with the `repo_id` provided. | ||||
| - If a repo with the `repo_id` already exists and if it contains a `build` with the build variant | ||||
|   being uploaded, it will attempt to delete the files existing under it. | ||||
| - Make sure to be authenticated (run `hf auth login` if not) to be able to perform uploads to the Hub. | ||||
|  | ||||
| @ -1,41 +0,0 @@ | ||||
| # FAQ | ||||
|  | ||||
| ## Kernel layers | ||||
|  | ||||
| ### Why is the kernelization step needed as a separate step? | ||||
|  | ||||
| In earlier versions of `kernels`, a layer's `forward` method was replaced | ||||
| by `use_kernel_forward_from_hub` and `replace_kernel_forward_from_hub`. | ||||
| The new `forward` would dispatch to a kernel based on the device type, | ||||
| whether a model was training, etc. However, this approach was | ||||
| fundamentally incompatible with `torch.compile` since it relied | ||||
| on data-dependent branching. | ||||
|  | ||||
| To avoid branching, we have to make dispatch decisions ahead of time, | ||||
| which is what the `kernelize` function does. | ||||
|  | ||||
| ### Why does kernelization only replace `forward` methods? | ||||
|  | ||||
| There are some other possible approaches. The first is to completely | ||||
| replace existing layers by kernel layers. However, since this would | ||||
| permit free-form layer classes, it would be much harder to validate | ||||
| that layers are fully compatible with the layers that they are | ||||
| replacing. For instance, they could have completely different member | ||||
| variables. Besides that, we would also need to hold on to the original | ||||
| layers, in case we need to revert to the base layers when the model | ||||
| is `kernelize`d again with different options. | ||||
|  | ||||
| A second approach would be to make an auxiliary layer that wraps the | ||||
| original layer and the kernel layer and dispatches to the kernel layer. | ||||
| This wouldn't have the issues of the first approach, because kernel layers | ||||
| could be similarly strict as they are now, and we would still have access | ||||
| to the original layers when `kernelize`-ing the model again. However, | ||||
| this would change the graph structure of the model and would break use | ||||
| cases where programs access the model internals (e.g. | ||||
| `model.layers[0].attention.query_weight`) or rely on the graph structure | ||||
| in other ways. | ||||
|  | ||||
| The approach of `forward`-replacement is the least invasive, because | ||||
| it preserves the original model graph. It is also reversible, since | ||||
| even though the `forward` of a layer _instance_ might be replaced, | ||||
| the corresponding class still has the original `forward`. | ||||
| @ -1,20 +0,0 @@ | ||||
| # Kernels | ||||
|  | ||||
| <div align="center"> | ||||
| <img src="https://github.com/user-attachments/assets/64a652f3-0cd3-4829-b3c1-df13f7933569" width="450" height="450" alt="kernel-builder logo"> | ||||
| </div> | ||||
|  | ||||
| The Kernel Hub allows Python libraries and applications to load compute | ||||
| kernels directly from the [Hub](https://hf.co/). To support this kind | ||||
| of dynamic loading, Hub kernels differ from traditional Python kernel | ||||
| packages in that they are made to be: | ||||
|  | ||||
| - **Portable**: a kernel can be loaded from paths outside `PYTHONPATH`. | ||||
| - **Unique**: multiple versions of the same kernel can be loaded in the | ||||
|   same Python process. | ||||
| - **Compatible**: kernels must support all recent versions of Python and | ||||
|   the different PyTorch build configurations (various CUDA versions | ||||
|   and C++ ABIs). Furthermore, older C library versions must be supported. | ||||
|  | ||||
| You can [search for kernels](https://huggingface.co/models?other=kernel) on | ||||
| the Hub. | ||||
| @ -1,16 +0,0 @@ | ||||
| # Installation | ||||
|  | ||||
| Install the `kernels` package with `pip` (requires `torch>=2.5` and CUDA): | ||||
|  | ||||
| ```bash | ||||
| pip install kernels | ||||
| ``` | ||||
|  | ||||
| # Using kernels in a Docker container | ||||
|  | ||||
| Build and run the reference `examples/basic.py` in a Docker container with the following commands: | ||||
|  | ||||
| ```bash | ||||
| docker build --platform linux/amd64 -t kernels-reference -f docker/Dockerfile.reference . | ||||
| docker run --gpus all -it --rm -e HF_TOKEN=$HF_TOKEN kernels-reference | ||||
| ``` | ||||
| @ -1,323 +0,0 @@ | ||||
| # Layers | ||||
|  | ||||
| A kernel can provide layers in addition to kernel functions. A layer from | ||||
| the Hub can replace the `forward` method of an existing layer for a certain | ||||
| device type. This makes it possible to provide more performant kernels for | ||||
| existing layers. | ||||
|  | ||||
| See [Kernel requirements](kernel-requirements.md) for more information on the | ||||
| requirements of Hub layers. | ||||
|  | ||||
| ## Making a layer extensible with kernels from the hub | ||||
|  | ||||
| ### Using a decorator | ||||
|  | ||||
| A layer can be made extensible with the `use_kernel_forward_from_hub` | ||||
| decorator. For example: | ||||
|  | ||||
| ```python | ||||
| @use_kernel_forward_from_hub("SiluAndMul") | ||||
| class SiluAndMul(nn.Module): | ||||
|     def forward(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         d = input.shape[-1] // 2 | ||||
|         return F.silu(input[..., :d]) * input[..., d:] | ||||
| ``` | ||||
|  | ||||
| The decorator does not change the behavior of the class -- it annotates | ||||
| the class with the given name (here `SiluAndMul`). The `kernelize` function | ||||
| described below uses this name to look up kernels for the layer. | ||||
|  | ||||
| ### External layers | ||||
|  | ||||
| An existing layer that does not (yet) have the `use_kernel_forward_from_hub` | ||||
| decorator can be made extensible using the `replace_kernel_forward_from_hub` | ||||
| function: | ||||
|  | ||||
| ```python | ||||
| from somelibrary import SiluAndMul | ||||
|  | ||||
| replace_kernel_forward_from_hub(SiluAndMul, "SiluAndMul") | ||||
| ``` | ||||
|  | ||||
| **Warning:** we strongly recommend using layers with a decorator, since | ||||
| it signifies that the maintainer intends to keep the `forward` signature | ||||
| compatible with layers from the hub. | ||||
|  | ||||
| ## Kernelizing a model | ||||
|  | ||||
| A model will not use Hub kernels by default, even if it contains extensible | ||||
| layers. To enable the use of Hub kernels in the model, it needs to be | ||||
| 'kernelized' using the `kernelize` function. This function traverses the | ||||
| model graph and replaces the `forward` methods of extensible layers for which | ||||
| Hub kernels are registered. `kernelize` can be used as follows: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, mode=Mode.INFERENCE) | ||||
| ``` | ||||
|  | ||||
| The `kernelize` function modifies the model in-place, the model itself is | ||||
| returned as a convenience. The `mode` specifies that the model will be used | ||||
| in inference. Similarly, you can ask `kernelize` to prepare the model for | ||||
| training: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, mode=Mode.TRAINING) | ||||
| ``` | ||||
|  | ||||
| A model that is kernelized for training can also be used for inference, but | ||||
| not the other way around. If you want to change the mode of the kernelized | ||||
| model, you can just run `kernelize` on the model again with the new mode. | ||||
|  | ||||
| If you want to compile a model with `torch.compile`, this should be indicated | ||||
| in the mode as well. You can do this by combining `Mode.INFERENCE` or | ||||
| `Mode.TRAINING` with `Mode.TORCH_COMPILE` using the set union (`|`) operator: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
|  | ||||
| # Inference | ||||
| model = kernelize(model, mode=Mode.INFERENCE | Mode.TORCH_COMPILE) | ||||
|  | ||||
| # Training | ||||
| model = kernelize(model, mode=Mode.TRAINING | Mode.TORCH_COMPILE) | ||||
| ``` | ||||
|  | ||||
| ### Kernel device | ||||
|  | ||||
| Kernels can be registered per device type. For instance, separate `cuda` and | ||||
| `metal` kernels could be registered for the name `SiluAndMul`. By default, | ||||
| `kernelize` will try to infer the device type from the model's parameters. | ||||
| You can pass the device type to `kernelize` if the device type cannot be | ||||
| inferred (e.g. because the model has no parameters): | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, device="cuda", mode=Mode.INFERENCE) | ||||
| ``` | ||||
|  | ||||
| ### Fallback `forward` | ||||
|  | ||||
| If the `TRAINING` and/or `TORCH_COMPILE` modes are used, but a registered | ||||
| kernel does not support backward passes or `torch.compile` respectively, | ||||
| `kernelize` will fall back to the original, non-kernelized, layer. You | ||||
| can let `kernelize` raise an exception instead by using `use_fallback=False`: | ||||
|  | ||||
| ```python | ||||
| model = MyModel(...) | ||||
| model = kernelize(model, mode=Mode.INFERENCE | Mode.TORCH_COMPILE, use_fallback=False) | ||||
| ``` | ||||
|  | ||||
| This can be useful if you want to guarantee that Hub kernels are used. | ||||
|  | ||||
| ### Inspecting which kernels are used | ||||
|  | ||||
| The kernels that are used are logged at the `INFO` level by `kernelize`. | ||||
| See the [Python logging](https://docs.python.org/3/library/logging.html) | ||||
| documentation for information on how to configure logging. | ||||
|  | ||||
| ## Registering a hub kernel for a layer | ||||
|  | ||||
| `kernelize` relies on kernel mappings to find Hub kernels for layers. | ||||
| Kernel mappings map a kernel name such as `SiluAndMul` to a kernel on | ||||
| the Hub. For example: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|         ), | ||||
|         "rocm": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| You can register such a mapping using `register_kernel_mapping`: | ||||
|  | ||||
| ```python | ||||
| register_kernel_mapping(kernel_layer_mapping) | ||||
| ``` | ||||
|  | ||||
| This will register the kernel mapping in the current context, which is | ||||
| normally global. It is recommended to scope the mapping to where it is | ||||
| used with the `use_kernel_mapping` context manager: | ||||
|  | ||||
| ```python | ||||
| with use_kernel_mapping(kernel_layer_mapping): | ||||
|     # Use the layer for which the mapping is applied. | ||||
|     model = kernelize(model, mode=Mode.TRAINING | Mode.TORCH_COMPILE) | ||||
| ``` | ||||
|  | ||||
| This ensures that the mapping is not active anymore outside the | ||||
| `with`-scope. | ||||
|  | ||||
| ### Using version bounds | ||||
|  | ||||
| Kernels are versioned using tags of the form `v<major>.<minor>.<patch>`. | ||||
| You can specify which version of the kernel to download using Python version | ||||
| specifiers: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|             version=">=0.0.4,<0.1.0", | ||||
|         ), | ||||
|         "rocm": LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|             version=">=0.0.4,<0.1.0", | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| This will get the layer from latest kernel tagged `v0.0.z` where `z` is at | ||||
| least 4. It is strongly recommended to specify a version bound, since a | ||||
| kernel author might push incompatible changes to the `main` branch. | ||||
|  | ||||
| ### Registering kernels for specific modes | ||||
|  | ||||
| You might want to register two different kernels for a particular layer, | ||||
| where one kernel is optimized for a specific mode. You can do so by | ||||
| registering layer repositories for specific modes. For example: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": { | ||||
|           Mode.INFERENCE: LayerRepository( | ||||
|               repo_id="kernels-community/activation-inference-optimized", | ||||
|               layer_name="SiluAndMul", | ||||
|           ), | ||||
|           Mode.TRAINING | Mode.TORCH_COMPILE: LayerRepository( | ||||
|               repo_id="kernels-community/activation-training-optimized", | ||||
|               layer_name="SiluAndMul", | ||||
|           ), | ||||
|       } | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| The `kernelize` function will attempt to use the following registered | ||||
| kernels for a given mode: | ||||
|  | ||||
| - `INFERENCE`: `INFERENCE` → `INFERENCE | TORCH_COMPILE` → `TRAINING` → | ||||
|   `TRAINING | TORCH_COMPILE` → `FALLBACK` | ||||
| - `INFERENCE | TORCH_COMPILE`: `INFERENCE | TORCH_COMPILE` → | ||||
|   `TRAINING | TORCH_COMPILE` → `FALLBACK` | ||||
| - `TRAINING`: `TRAINING` → `TRAINING | TORCH_COMPILE` → `FALLBACK` | ||||
| - `TRAINING | TORCH_COMPILE`: `TRAINING | TORCH_COMPILE` → `FALLBACK` | ||||
|  | ||||
| `Mode.FALLBACK` is a special mode that is used when no other mode matches. It | ||||
| is also used when a kernel is registered without a mode, as described in the | ||||
| previous section. | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         "cuda": { | ||||
|             Mode.FALLBACK: LayerRepository( | ||||
|                 repo_id="kernels-community/activation", | ||||
|                 layer_name="SiluAndMul", | ||||
|             ), | ||||
|             Mode.INFERENCE: LayerRepository( | ||||
|                 repo_id="kernels-community/activation-inference-optimized", | ||||
|                 layer_name="SiluAndMul", | ||||
|             ), | ||||
|             Mode.TRAINING: LayerRepository( | ||||
|                 repo_id="kernels-community/activation-training-optimized", | ||||
|                 layer_name="SiluAndMul", | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| In this case, both `Mode.INFERENCE | Mode.TORCH_COMPILE` and | ||||
| `Mode.TRAINING | Mode.TORCH_COMPILE` will use the `Mode.FALLBACK` kernel, | ||||
| since the other kernels do not support `torch.compile`. | ||||
|  | ||||
| ### Registering kernels for specific CUDA capabilities | ||||
|  | ||||
| Some kernels only work with newer CUDA architectures. For instance, some | ||||
| kernels require capability 9.0 for the TMA unit on Hopper GPUs. `kernels` | ||||
| supports registering layers for a range of CUDA capabilities. To do so, | ||||
| you need to register the layer for a `Device` with type `cuda` and | ||||
| set the supported range of CUDA capabilities with using `CUDAProperties`: | ||||
|  | ||||
| ```python | ||||
| kernel_layer_mapping = { | ||||
|     "SiluAndMul": { | ||||
|         Device( | ||||
|             type="cuda", | ||||
|             properties=CUDAProperties( | ||||
|                 min_capability=75, max_capability=89 | ||||
|             ), | ||||
|         ): LayerRepository( | ||||
|             repo_id="kernels-community/activation", | ||||
|             layer_name="SiluAndMul", | ||||
|         ), | ||||
|         Device( | ||||
|             type="cuda", | ||||
|             properties=CUDAProperties( | ||||
|                 min_capability=90, max_capability=sys.maxsize | ||||
|             ), | ||||
|         ): LayerRepository( | ||||
|             repo_id="kernels-community/activation-hopper", | ||||
|             layer_name="SiluAndMul", | ||||
|         ), | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Capabilities behave as follows: | ||||
|  | ||||
| - The minimum and maximum capabilities are inclusive. | ||||
| - When a new kernel is registered with the same min/max capabilities as | ||||
|   an existing kernel, the new kernel will replace the old kernel. | ||||
| - When there are multiple kernels that support a capability, the kernel | ||||
|   with the smaller capability interval will be used. E.g. given: | ||||
|   - `KernelA` with `min_capability=80` and `max_capability=89`; | ||||
|   - `KernelB` with `min_capability=75` and `max_capability=89`; | ||||
|   - `kernelize` runs on a system with capability 8.6. | ||||
|  | ||||
|   Then `KernelA` will be used because the interval 80..89 is smaller | ||||
|   than 75..89. The motivation is that kernels with smaller ranges | ||||
|   tend to be more optimized for a specific set of GPUs. **This behavior | ||||
|   might still change in the future.** | ||||
|  | ||||
| ### Registering kernels for specific ROCm capabilities | ||||
|  | ||||
| Registering kernels for the ROCm architecture follows the exact same | ||||
| pattern as CUDA kernels, using `min_capability` and `max_capability` to restrict | ||||
| a kernel to a range of ROCm capabilities. | ||||
|  | ||||
| ### Loading from a local repository for testing | ||||
|  | ||||
| The `LocalLayerRepository` class is provided to load a repository from | ||||
| a local directory. For example: | ||||
|  | ||||
| ```python | ||||
| with use_kernel_mapping( | ||||
|     { | ||||
|         "SiluAndMul": { | ||||
|             "cuda": LocalLayerRepository( | ||||
|                 repo_path="/home/daniel/kernels/activation", | ||||
|                 package_name="activation", | ||||
|                 layer_name="SiluAndMul", | ||||
|             ) | ||||
|         } | ||||
|     }, | ||||
|     inherit_mapping=False, | ||||
| ): | ||||
|     kernelize(linear, mode=Mode.INFERENCE) | ||||
| ``` | ||||
							
								
								
									
										19
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										19
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							| @ -58,32 +58,33 @@ | ||||
|         "nixpkgs": "nixpkgs" | ||||
|       }, | ||||
|       "locked": { | ||||
|         "lastModified": 1754038838, | ||||
|         "narHash": "sha256-oHigCT4z0ayyLyEuxdZooSXRAZP8lfOkZHzY1lx1U50=", | ||||
|         "lastModified": 1749025620, | ||||
|         "narHash": "sha256-V/r5KOp8FRC5n3MINDzTeS3pZz57SasFVzx12WQRQ8U=", | ||||
|         "owner": "huggingface", | ||||
|         "repo": "hf-nix", | ||||
|         "rev": "336f781fa284e193baa3d4c3ce3f95fb34e9ffad", | ||||
|         "rev": "7ab84ffad440c530162f528a96fa062530a6c8e4", | ||||
|         "type": "github" | ||||
|       }, | ||||
|       "original": { | ||||
|         "owner": "huggingface", | ||||
|         "ref": "torch-cxx11", | ||||
|         "repo": "hf-nix", | ||||
|         "type": "github" | ||||
|       } | ||||
|     }, | ||||
|     "nixpkgs": { | ||||
|       "locked": { | ||||
|         "lastModified": 1752785354, | ||||
|         "narHash": "sha256-Y33ryUz7MPqKrZwlbQcsYCUz2jAJCacRf8jbs0tYUlA=", | ||||
|         "owner": "nixos", | ||||
|         "lastModified": 1747820358, | ||||
|         "narHash": "sha256-fTqsZsUX6M3yeEvgyQvXcbGmT2CaRVyVwsi8eK29Oj4=", | ||||
|         "owner": "danieldk", | ||||
|         "repo": "nixpkgs", | ||||
|         "rev": "d38025438a6ee456758dc03188ca6873a415463b", | ||||
|         "rev": "d3c1681180717528068082103bf323147de6ab0b", | ||||
|         "type": "github" | ||||
|       }, | ||||
|       "original": { | ||||
|         "owner": "nixos", | ||||
|         "owner": "danieldk", | ||||
|         "ref": "cudatoolkit-12.9-kernel-builder", | ||||
|         "repo": "nixpkgs", | ||||
|         "rev": "d38025438a6ee456758dc03188ca6873a415463b", | ||||
|         "type": "github" | ||||
|       } | ||||
|     }, | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| { | ||||
|   inputs = { | ||||
|     hf-nix.url = "github:huggingface/hf-nix"; | ||||
|     hf-nix.url = "github:huggingface/hf-nix/torch-cxx11"; | ||||
|     nixpkgs.follows = "hf-nix/nixpkgs"; | ||||
|     flake-utils.url = "github:numtide/flake-utils"; | ||||
|   }; | ||||
| @ -16,7 +16,7 @@ | ||||
|       let | ||||
|         pkgs = import nixpkgs { | ||||
|           inherit system; | ||||
|           config = hf-nix.lib.config system; | ||||
|           inherit (hf-nix.lib) config; | ||||
|           overlays = [ | ||||
|             hf-nix.overlays.default | ||||
|           ]; | ||||
| @ -26,10 +26,6 @@ | ||||
|         formatter = pkgs.nixfmt-tree; | ||||
|         devShells = with pkgs; rec { | ||||
|           default = mkShell { | ||||
|             nativeBuildInputs = [ | ||||
|               # For hf-doc-builder. | ||||
|               nodejs | ||||
|             ]; | ||||
|             buildInputs = | ||||
|               [ | ||||
|                 black | ||||
| @ -40,7 +36,6 @@ | ||||
|               ++ (with python3.pkgs; [ | ||||
|                 docutils | ||||
|                 huggingface-hub | ||||
|                 mktestdocs | ||||
|                 pytest | ||||
|                 pytest-benchmark | ||||
|                 pyyaml | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| [project] | ||||
| name = "kernels" | ||||
| version = "0.10.1.dev0" | ||||
| version = "0.6.1" | ||||
| description = "Download compute kernels" | ||||
| authors = [ | ||||
|   { name = "OlivierDehaene", email = "olivier@huggingface.co" }, | ||||
| @ -24,20 +24,16 @@ build-backend = "setuptools.build_meta" | ||||
|  | ||||
| [dependency-groups] | ||||
| dev = [ | ||||
|   "mktestdocs>=0.2.5", | ||||
|   "mypy>=1.15.0", | ||||
|   "pytest>=8", | ||||
|   "mypy == 1.14.1", | ||||
|   "pytest >=8", | ||||
|   # Whatever version is compatible with pytest. | ||||
|   "pytest-benchmark", | ||||
|   "torch>=2.5", | ||||
|   "torch >=2.5", | ||||
|   "types-pyyaml" | ||||
| ] | ||||
|  | ||||
| [project.optional-dependencies] | ||||
| torch = ["torch"] | ||||
| docs = [ | ||||
|   "hf-doc-builder", | ||||
| ] | ||||
|  | ||||
| [project.scripts] | ||||
| kernels = "kernels.cli:main" | ||||
|  | ||||
| @ -1,7 +1,4 @@ | ||||
| [pytest] | ||||
| markers = | ||||
|     cuda_only: marks tests that should only hosts with CUDA GPUs | ||||
|     rocm_only: marks tests that should only run on hosts with ROCm GPUs | ||||
|     darwin_only: marks tests that should only run on macOS | ||||
|     xpu_only: marks tests that should only run on hosts with Intel XPUs | ||||
|     token: enable tests that require a write token | ||||
|     linux_only: marks tests that should only run on Linux | ||||
| @ -1,14 +1,6 @@ | ||||
| import importlib.metadata | ||||
|  | ||||
| __version__ = importlib.metadata.version("kernels") | ||||
|  | ||||
| from kernels.layer import ( | ||||
|     CUDAProperties, | ||||
|     Device, | ||||
|     LayerRepository, | ||||
|     LocalLayerRepository, | ||||
|     LockedLayerRepository, | ||||
|     Mode, | ||||
|     kernelize, | ||||
|     register_kernel_mapping, | ||||
|     replace_kernel_forward_from_hub, | ||||
| @ -17,7 +9,6 @@ from kernels.layer import ( | ||||
| ) | ||||
| from kernels.utils import ( | ||||
|     get_kernel, | ||||
|     get_local_kernel, | ||||
|     get_locked_kernel, | ||||
|     has_kernel, | ||||
|     install_kernel, | ||||
| @ -25,22 +16,16 @@ from kernels.utils import ( | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
|     "__version__", | ||||
|     "CUDAProperties", | ||||
|     "Device", | ||||
|     "LayerRepository", | ||||
|     "LocalLayerRepository", | ||||
|     "LockedLayerRepository", | ||||
|     "Mode", | ||||
|     "get_kernel", | ||||
|     "get_local_kernel", | ||||
|     "get_locked_kernel", | ||||
|     "has_kernel", | ||||
|     "install_kernel", | ||||
|     "kernelize", | ||||
|     "load_kernel", | ||||
|     "register_kernel_mapping", | ||||
|     "replace_kernel_forward_from_hub", | ||||
|     "install_kernel", | ||||
|     "use_kernel_forward_from_hub", | ||||
|     "use_kernel_mapping", | ||||
|     "register_kernel_mapping", | ||||
|     "replace_kernel_forward_from_hub", | ||||
|     "LayerRepository", | ||||
|     "Device", | ||||
|     "kernelize", | ||||
| ] | ||||
|  | ||||
| @ -1,200 +0,0 @@ | ||||
| # AVL-balanced interval trees. We could use the intervaltree | ||||
| # packages, but it seems unmaintained and does not have type | ||||
| # annotations. | ||||
|  | ||||
| from typing import Generic, List, Optional, Tuple, TypeVar | ||||
|  | ||||
| T = TypeVar("T") | ||||
|  | ||||
|  | ||||
| class _Node(Generic[T]): | ||||
|     """A node in the interval tree.""" | ||||
|  | ||||
|     def __init__(self, start: int, end: int, data: T): | ||||
|         self.start: int = start | ||||
|         self.end: int = end | ||||
|         self.data: T = data | ||||
|         self.max_end: int = end | ||||
|         self.left: Optional["_Node[T]"] = None | ||||
|         self.right: Optional["_Node[T]"] = None | ||||
|         self.height: int = 1 | ||||
|  | ||||
|     def __repr__(self) -> str: | ||||
|         return f"Node({self.start}, {self.end})" | ||||
|  | ||||
|  | ||||
| class IntervalTree(Generic[T]): | ||||
|     """A data structure to hold and query (unique) intervals.""" | ||||
|  | ||||
|     root: Optional[_Node[T]] | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.root = None | ||||
|  | ||||
|     def insert(self, start: int, end: int, data: T) -> None: | ||||
|         """ | ||||
|         Inserts a new interval into the tree. | ||||
|  | ||||
|         Args: | ||||
|             start: The starting point of the interval. | ||||
|             end: The ending point of the interval. | ||||
|             data: The data associated with this interval. | ||||
|         """ | ||||
|         self.root = self._insert(self.root, start, end, data) | ||||
|  | ||||
|     def _get_height(self, node: Optional[_Node[T]]) -> int: | ||||
|         if not node: | ||||
|             return 0 | ||||
|         return node.height | ||||
|  | ||||
|     def _get_balance(self, node: Optional[_Node[T]]) -> int: | ||||
|         if not node: | ||||
|             return 0 | ||||
|         return self._get_height(node.left) - self._get_height(node.right) | ||||
|  | ||||
|     def _update_node_attributes(self, node: _Node[T]) -> None: | ||||
|         node.height = 1 + max(self._get_height(node.left), self._get_height(node.right)) | ||||
|         node.max_end = node.end | ||||
|         if node.left: | ||||
|             node.max_end = max(node.max_end, node.left.max_end) | ||||
|         if node.right: | ||||
|             node.max_end = max(node.max_end, node.right.max_end) | ||||
|  | ||||
|     def _right_rotate(self, y: _Node[T]) -> _Node[T]: | ||||
|         """Performs a right rotation.""" | ||||
|         x = y.left | ||||
|         assert x is not None | ||||
|         T2 = x.right | ||||
|  | ||||
|         x.right = y | ||||
|         y.left = T2 | ||||
|  | ||||
|         self._update_node_attributes(y) | ||||
|         self._update_node_attributes(x) | ||||
|  | ||||
|         return x | ||||
|  | ||||
|     def _left_rotate(self, x: _Node[T]) -> _Node[T]: | ||||
|         """Performs a left rotation.""" | ||||
|         y = x.right | ||||
|         assert y is not None | ||||
|         T2 = y.left | ||||
|  | ||||
|         y.left = x | ||||
|         x.right = T2 | ||||
|  | ||||
|         self._update_node_attributes(x) | ||||
|         self._update_node_attributes(y) | ||||
|  | ||||
|         return y | ||||
|  | ||||
|     def _insert( | ||||
|         self, node: Optional[_Node[T]], start: int, end: int, data: T | ||||
|     ) -> _Node[T]: | ||||
|         """Recursive helper to insert a new node and balance the tree.""" | ||||
|         if not node: | ||||
|             return _Node(start, end, data) | ||||
|  | ||||
|         # Replace the data if the interval already exists. | ||||
|         if start == node.start and end == node.end: | ||||
|             node.data = data | ||||
|             return node | ||||
|  | ||||
|         if start < node.start: | ||||
|             node.left = self._insert(node.left, start, end, data) | ||||
|         else: | ||||
|             node.right = self._insert(node.right, start, end, data) | ||||
|  | ||||
|         self._update_node_attributes(node) | ||||
|  | ||||
|         balance = self._get_balance(node) | ||||
|  | ||||
|         # Left Left Case | ||||
|         if balance > 1 and node.left and start < node.left.start: | ||||
|             return self._right_rotate(node) | ||||
|  | ||||
|         # Right Right Case | ||||
|         if balance < -1 and node.right and start >= node.right.start: | ||||
|             return self._left_rotate(node) | ||||
|  | ||||
|         # Left Right Case | ||||
|         if balance > 1 and node.left and start >= node.left.start: | ||||
|             node.left = self._left_rotate(node.left) | ||||
|             return self._right_rotate(node) | ||||
|  | ||||
|         # Right Left Case | ||||
|         if balance < -1 and node.right and start < node.right.start: | ||||
|             node.right = self._right_rotate(node.right) | ||||
|             return self._left_rotate(node) | ||||
|  | ||||
|         return node | ||||
|  | ||||
|     def search(self, point: int) -> List[T]: | ||||
|         """ | ||||
|         Searches for all intervals that contain the given point. | ||||
|  | ||||
|         Args: | ||||
|             point: The point to search for. | ||||
|  | ||||
|         Returns: | ||||
|             A list of data items from all matching intervals. | ||||
|         """ | ||||
|         results: List[T] = [] | ||||
|         self._search(self.root, point, results) | ||||
|         return results | ||||
|  | ||||
|     def _search(self, node: Optional[_Node[T]], point: int, results: List[T]) -> None: | ||||
|         """Recursive helper to find all overlapping intervals.""" | ||||
|         if node is None or point > node.max_end: | ||||
|             return | ||||
|  | ||||
|         if node.left: | ||||
|             self._search(node.left, point, results) | ||||
|  | ||||
|         if node.start <= point <= node.end: | ||||
|             results.append(node.data) | ||||
|  | ||||
|         if point >= node.start and node.right: | ||||
|             self._search(node.right, point, results) | ||||
|  | ||||
|     def find_smallest_interval(self, point: int) -> Optional[T]: | ||||
|         """ | ||||
|         Finds the item with the most specific (smallest) range for a given point. | ||||
|  | ||||
|         Args: | ||||
|             point: The capability to look up. | ||||
|  | ||||
|         Returns: | ||||
|             The data of the best-matching item, or None if no match is found. | ||||
|         """ | ||||
|         matches: List[Tuple[int, int, T]] = [] | ||||
|         self._find_with_intervals(self.root, point, matches) | ||||
|  | ||||
|         if not matches: | ||||
|             return None | ||||
|  | ||||
|         # Return the smallest interval, sort by memory location when | ||||
|         # there are multiple matches with the same interval size. This | ||||
|         # is just to ensure that we can compare against a trivial | ||||
|         # implementation in tests. | ||||
|         best_match = min(matches, key=lambda x: (x[1] - x[0], id(x[2]))) | ||||
|         return best_match[2] | ||||
|  | ||||
|     def _find_with_intervals( | ||||
|         self, | ||||
|         node: Optional[_Node[T]], | ||||
|         point: int, | ||||
|         results: List[Tuple[int, int, T]], | ||||
|     ) -> None: | ||||
|         """A modified search that collects interval ranges along with data.""" | ||||
|         if node is None or point > node.max_end: | ||||
|             return | ||||
|  | ||||
|         if node.left: | ||||
|             self._find_with_intervals(node.left, point, results) | ||||
|  | ||||
|         if node.start <= point <= node.end: | ||||
|             results.append((node.start, node.end, node.data)) | ||||
|  | ||||
|         if point >= node.start and node.right: | ||||
|             self._find_with_intervals(node.right, point, results) | ||||
| @ -1,52 +0,0 @@ | ||||
| from typing import Dict, Optional | ||||
|  | ||||
| from huggingface_hub import HfApi | ||||
| from huggingface_hub.hf_api import GitRefInfo | ||||
| from packaging.specifiers import SpecifierSet | ||||
| from packaging.version import InvalidVersion, Version | ||||
|  | ||||
|  | ||||
| def _get_available_versions(repo_id: str) -> Dict[Version, GitRefInfo]: | ||||
|     """Get kernel versions that are available in the repository.""" | ||||
|     versions = {} | ||||
|     for tag in HfApi().list_repo_refs(repo_id).tags: | ||||
|         if not tag.name.startswith("v"): | ||||
|             continue | ||||
|         try: | ||||
|             versions[Version(tag.name[1:])] = tag | ||||
|         except InvalidVersion: | ||||
|             continue | ||||
|  | ||||
|     return versions | ||||
|  | ||||
|  | ||||
| def resolve_version_spec_as_ref(repo_id: str, version_spec: str) -> GitRefInfo: | ||||
|     """ | ||||
|     Get the locks for a kernel with the given version spec. | ||||
|  | ||||
|     The version specifier can be any valid Python version specifier: | ||||
|     https://packaging.python.org/en/latest/specifications/version-specifiers/#version-specifiers | ||||
|     """ | ||||
|     versions = _get_available_versions(repo_id) | ||||
|     requirement = SpecifierSet(version_spec) | ||||
|     accepted_versions = sorted(requirement.filter(versions.keys())) | ||||
|  | ||||
|     if len(accepted_versions) == 0: | ||||
|         raise ValueError( | ||||
|             f"No version of `{repo_id}` satisfies requirement: {version_spec}" | ||||
|         ) | ||||
|  | ||||
|     return versions[accepted_versions[-1]] | ||||
|  | ||||
|  | ||||
| def select_revision_or_version( | ||||
|     repo_id: str, revision: Optional[str], version: Optional[str] | ||||
| ) -> str: | ||||
|     if revision is not None and version is not None: | ||||
|         raise ValueError("Either a revision or a version must be specified, not both.") | ||||
|     elif revision is None and version is None: | ||||
|         revision = "main" | ||||
|     elif version is not None: | ||||
|         revision = resolve_version_spec_as_ref(repo_id, version).target_commit | ||||
|     assert revision is not None | ||||
|     return revision | ||||
| @ -4,8 +4,6 @@ import json | ||||
| import sys | ||||
| from pathlib import Path | ||||
|  | ||||
| from huggingface_hub import create_repo, upload_folder | ||||
|  | ||||
| from kernels.compat import tomllib | ||||
| from kernels.lockfile import KernelLock, get_kernel_locks | ||||
| from kernels.utils import install_kernel, install_kernel_all_variants | ||||
| @ -33,24 +31,6 @@ def main(): | ||||
|     ) | ||||
|     download_parser.set_defaults(func=download_kernels) | ||||
|  | ||||
|     upload_parser = subparsers.add_parser("upload", help="Upload kernels to the Hub") | ||||
|     upload_parser.add_argument( | ||||
|         "kernel_dir", | ||||
|         type=Path, | ||||
|         help="Directory of the kernel build", | ||||
|     ) | ||||
|     upload_parser.add_argument( | ||||
|         "--repo_id", | ||||
|         type=str, | ||||
|         help="Repository ID to use to upload to the Hugging Face Hub", | ||||
|     ) | ||||
|     upload_parser.add_argument( | ||||
|         "--private", | ||||
|         action="store_true", | ||||
|         help="If the repository should be private.", | ||||
|     ) | ||||
|     upload_parser.set_defaults(func=upload_kernels) | ||||
|  | ||||
|     lock_parser = subparsers.add_parser("lock", help="Lock kernel revisions") | ||||
|     lock_parser.add_argument( | ||||
|         "project_dir", | ||||
| @ -173,33 +153,6 @@ def lock_kernels(args): | ||||
|         json.dump(all_locks, f, cls=_JSONEncoder, indent=2) | ||||
|  | ||||
|  | ||||
| def upload_kernels(args): | ||||
|     kernel_dir = Path(args.kernel_dir).resolve() | ||||
|     build_dir = kernel_dir / "build" | ||||
|     if not kernel_dir.is_dir(): | ||||
|         raise ValueError(f"{kernel_dir} is not a directory") | ||||
|     if not build_dir.is_dir(): | ||||
|         raise ValueError("Couldn't find `build` directory inside `kernel_dir`") | ||||
|  | ||||
|     repo_id = create_repo( | ||||
|         repo_id=args.repo_id, private=args.private, exist_ok=True | ||||
|     ).repo_id | ||||
|  | ||||
|     delete_patterns: set[str] = set() | ||||
|     for build_variant in build_dir.iterdir(): | ||||
|         if build_variant.is_dir(): | ||||
|             delete_patterns.add(f"{build_variant.name}/**") | ||||
|  | ||||
|     upload_folder( | ||||
|         repo_id=repo_id, | ||||
|         folder_path=build_dir, | ||||
|         path_in_repo="build", | ||||
|         delete_patterns=list(delete_patterns), | ||||
|         commit_message="Build uploaded using `kernels`.", | ||||
|     ) | ||||
|     print(f"✅ Kernel upload successful. Find the kernel in https://hf.co/{repo_id}.") | ||||
|  | ||||
|  | ||||
| class _JSONEncoder(json.JSONEncoder): | ||||
|     def default(self, o): | ||||
|         if dataclasses.is_dataclass(o): | ||||
|  | ||||
							
								
								
									
										1068
									
								
								src/kernels/layer.py
									
									
									
									
									
								
							
							
						
						
									
										1068
									
								
								src/kernels/layer.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -4,8 +4,10 @@ from pathlib import Path | ||||
| from typing import Dict, List, Tuple | ||||
|  | ||||
| from huggingface_hub import HfApi | ||||
| from huggingface_hub.hf_api import GitRefInfo | ||||
| from packaging.specifiers import SpecifierSet | ||||
| from packaging.version import InvalidVersion, Version | ||||
|  | ||||
| from kernels._versions import resolve_version_spec_as_ref | ||||
| from kernels.compat import tomllib | ||||
|  | ||||
|  | ||||
| @ -29,6 +31,20 @@ class KernelLock: | ||||
|         return cls(repo_id=o["repo_id"], sha=o["sha"], variants=variants) | ||||
|  | ||||
|  | ||||
| def _get_available_versions(repo_id: str) -> Dict[Version, GitRefInfo]: | ||||
|     """Get kernel versions that are available in the repository.""" | ||||
|     versions = {} | ||||
|     for tag in HfApi().list_repo_refs(repo_id).tags: | ||||
|         if not tag.name.startswith("v"): | ||||
|             continue | ||||
|         try: | ||||
|             versions[Version(tag.name[1:])] = tag | ||||
|         except InvalidVersion: | ||||
|             continue | ||||
|  | ||||
|     return versions | ||||
|  | ||||
|  | ||||
| def get_kernel_locks(repo_id: str, version_spec: str) -> KernelLock: | ||||
|     """ | ||||
|     Get the locks for a kernel with the given version spec. | ||||
| @ -36,7 +52,16 @@ def get_kernel_locks(repo_id: str, version_spec: str) -> KernelLock: | ||||
|     The version specifier can be any valid Python version specifier: | ||||
|     https://packaging.python.org/en/latest/specifications/version-specifiers/#version-specifiers | ||||
|     """ | ||||
|     tag_for_newest = resolve_version_spec_as_ref(repo_id, version_spec) | ||||
|     versions = _get_available_versions(repo_id) | ||||
|     requirement = SpecifierSet(version_spec) | ||||
|     accepted_versions = sorted(requirement.filter(versions.keys())) | ||||
|  | ||||
|     if len(accepted_versions) == 0: | ||||
|         raise ValueError( | ||||
|             f"No version of `{repo_id}` satisfies requirement: {version_spec}" | ||||
|         ) | ||||
|  | ||||
|     tag_for_newest = versions[accepted_versions[-1]] | ||||
|  | ||||
|     r = HfApi().repo_info( | ||||
|         repo_id=repo_id, revision=tag_for_newest.target_commit, files_metadata=True | ||||
|  | ||||
| @ -16,7 +16,6 @@ from typing import Dict, List, Optional, Tuple | ||||
| from huggingface_hub import file_exists, snapshot_download | ||||
| from packaging.version import parse | ||||
|  | ||||
| from kernels._versions import select_revision_or_version | ||||
| from kernels.lockfile import KernelLock, VariantLock | ||||
|  | ||||
|  | ||||
| @ -46,12 +45,9 @@ def build_variant() -> str: | ||||
|         compute_framework = f"rocm{rocm_version.major}{rocm_version.minor}" | ||||
|     elif torch.backends.mps.is_available(): | ||||
|         compute_framework = "metal" | ||||
|     elif torch.version.xpu is not None: | ||||
|         version = torch.version.xpu | ||||
|         compute_framework = f"xpu{version[0:4]}{version[5:6]}" | ||||
|     else: | ||||
|         raise AssertionError( | ||||
|             "Torch was not compiled with CUDA, Metal, XPU, or ROCm enabled." | ||||
|             "Torch was not compiled with CUDA, Metal, or ROCm enabled." | ||||
|         ) | ||||
|  | ||||
|     torch_version = parse(torch.__version__) | ||||
| @ -59,7 +55,6 @@ def build_variant() -> str: | ||||
|     os = platform.system().lower() | ||||
|  | ||||
|     if os == "darwin": | ||||
|         cpu = "aarch64" if cpu == "arm64" else cpu | ||||
|         return f"torch{torch_version.major}{torch_version.minor}-{compute_framework}-{cpu}-{os}" | ||||
|  | ||||
|     cxxabi = "cxx11" if torch.compiled_with_cxx11_abi() else "cxx98" | ||||
| @ -99,20 +94,7 @@ def install_kernel( | ||||
|     """ | ||||
|     Download a kernel for the current environment to the cache. | ||||
|  | ||||
|     The output path is validated against the hashes in `variant_locks` when provided. | ||||
|  | ||||
|     Args: | ||||
|         repo_id (`str`): | ||||
|             The Hub repository containing the kernel. | ||||
|         revision (`str`): | ||||
|             The specific revision (branch, tag, or commit) to download. | ||||
|         local_files_only (`bool`, *optional*, defaults to `False`): | ||||
|             Whether to only use local files and not download from the Hub. | ||||
|         variant_locks (`Dict[str, VariantLock]`, *optional*): | ||||
|             Optional dictionary of variant locks for validation. | ||||
|  | ||||
|     Returns: | ||||
|         `Tuple[str, Path]`: A tuple containing the package name and the path to the variant directory. | ||||
|     The output path is validated againt `hash` when set. | ||||
|     """ | ||||
|     package_name = package_name_from_repo_id(repo_id) | ||||
|     variant = build_variant() | ||||
| @ -127,23 +109,6 @@ def install_kernel( | ||||
|         ) | ||||
|     ) | ||||
|  | ||||
|     try: | ||||
|         return _load_kernel_from_path(repo_path, package_name, variant_locks) | ||||
|     except FileNotFoundError: | ||||
|         # Redo with more specific error message. | ||||
|         raise FileNotFoundError( | ||||
|             f"Kernel `{repo_id}` at revision {revision} does not have build: {variant}" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def _load_kernel_from_path( | ||||
|     repo_path: Path, | ||||
|     package_name: str, | ||||
|     variant_locks: Optional[Dict[str, VariantLock]] = None, | ||||
| ) -> Tuple[str, Path]: | ||||
|     variant = build_variant() | ||||
|     universal_variant = universal_build_variant() | ||||
|  | ||||
|     variant_path = repo_path / "build" / variant | ||||
|     universal_variant_path = repo_path / "build" / universal_variant | ||||
|  | ||||
| @ -162,7 +127,7 @@ def _load_kernel_from_path( | ||||
|  | ||||
|     if not os.path.exists(module_init_path): | ||||
|         raise FileNotFoundError( | ||||
|             f"Kernel at path `{repo_path}` does not have build: {variant}" | ||||
|             f"Kernel `{repo_id}` at revision {revision} does not have build: {variant}" | ||||
|         ) | ||||
|  | ||||
|     return package_name, variant_path | ||||
| @ -199,96 +164,16 @@ def install_kernel_all_variants( | ||||
|     return repo_path / "build" | ||||
|  | ||||
|  | ||||
| def get_kernel( | ||||
|     repo_id: str, revision: Optional[str] = None, version: Optional[str] = None | ||||
| ) -> ModuleType: | ||||
|     """ | ||||
|     Load a kernel from the kernel hub. | ||||
|  | ||||
|     This function downloads a kernel to the local Hugging Face Hub cache directory (if it was not downloaded before) | ||||
|     and then loads the kernel. | ||||
|  | ||||
|     Args: | ||||
|         repo_id (`str`): | ||||
|             The Hub repository containing the kernel. | ||||
|         revision (`str`, *optional*, defaults to `"main"`): | ||||
|             The specific revision (branch, tag, or commit) to download. Cannot be used together with `version`. | ||||
|         version (`str`, *optional*): | ||||
|             The kernel version to download. This can be a Python version specifier, such as `">=1.0.0,<2.0.0"`. | ||||
|             Cannot be used together with `revision`. | ||||
|  | ||||
|     Returns: | ||||
|         `ModuleType`: The imported kernel module. | ||||
|  | ||||
|     Example: | ||||
|         ```python | ||||
|         import torch | ||||
|         from kernels import get_kernel | ||||
|  | ||||
|         activation = get_kernel("kernels-community/activation") | ||||
|         x = torch.randn(10, 20, device="cuda") | ||||
|         out = torch.empty_like(x) | ||||
|         result = activation.silu_and_mul(out, x) | ||||
|         ``` | ||||
|     """ | ||||
|     revision = select_revision_or_version(repo_id, revision, version) | ||||
| def get_kernel(repo_id: str, revision: str = "main") -> ModuleType: | ||||
|     package_name, package_path = install_kernel(repo_id, revision=revision) | ||||
|     return import_from_path(package_name, package_path / package_name / "__init__.py") | ||||
|  | ||||
|  | ||||
| def get_local_kernel(repo_path: Path, package_name: str) -> ModuleType: | ||||
| def has_kernel(repo_id: str, revision: str = "main") -> bool: | ||||
|     """ | ||||
|     Import a kernel from a local kernel repository path. | ||||
|  | ||||
|     Args: | ||||
|         repo_path (`Path`): | ||||
|             The local path to the kernel repository. | ||||
|         package_name (`str`): | ||||
|             The name of the package to import from the repository. | ||||
|  | ||||
|     Returns: | ||||
|         `ModuleType`: The imported kernel module. | ||||
|     Check whether a kernel build exists for the current environment | ||||
|     (Torch version and compute framework). | ||||
|     """ | ||||
|     variant = build_variant() | ||||
|     universal_variant = universal_build_variant() | ||||
|  | ||||
|     # Presume we were given the top level path of the kernel repository. | ||||
|     for base_path in [repo_path, repo_path / "build"]: | ||||
|         # Prefer the universal variant if it exists. | ||||
|         for v in [universal_variant, variant]: | ||||
|             package_path = base_path / v / package_name / "__init__.py" | ||||
|             if package_path.exists(): | ||||
|                 return import_from_path(package_name, package_path) | ||||
|  | ||||
|     # If we didn't find the package in the repo we may have a explicit | ||||
|     # package path. | ||||
|     package_path = repo_path / package_name / "__init__.py" | ||||
|     if package_path.exists(): | ||||
|         return import_from_path(package_name, package_path) | ||||
|  | ||||
|     raise FileNotFoundError(f"Could not find package '{package_name}' in {repo_path}") | ||||
|  | ||||
|  | ||||
| def has_kernel( | ||||
|     repo_id: str, revision: Optional[str] = None, version: Optional[str] = None | ||||
| ) -> bool: | ||||
|     """ | ||||
|     Check whether a kernel build exists for the current environment (Torch version and compute framework). | ||||
|  | ||||
|     Args: | ||||
|         repo_id (`str`): | ||||
|             The Hub repository containing the kernel. | ||||
|         revision (`str`, *optional*, defaults to `"main"`): | ||||
|             The specific revision (branch, tag, or commit) to download. Cannot be used together with `version`. | ||||
|         version (`str`, *optional*): | ||||
|             The kernel version to download. This can be a Python version specifier, such as `">=1.0.0,<2.0.0"`. | ||||
|             Cannot be used together with `revision`. | ||||
|  | ||||
|     Returns: | ||||
|         `bool`: `True` if a kernel is available for the current environment. | ||||
|     """ | ||||
|     revision = select_revision_or_version(repo_id, revision, version) | ||||
|  | ||||
|     package_name = package_name_from_repo_id(repo_id) | ||||
|     variant = build_variant() | ||||
|     universal_variant = universal_build_variant() | ||||
| @ -311,16 +196,8 @@ def load_kernel(repo_id: str, *, lockfile: Optional[Path] = None) -> ModuleType: | ||||
|     """ | ||||
|     Get a pre-downloaded, locked kernel. | ||||
|  | ||||
|     If `lockfile` is not specified, the lockfile will be loaded from the caller's package metadata. | ||||
|  | ||||
|     Args: | ||||
|         repo_id (`str`): | ||||
|             The Hub repository containing the kernel. | ||||
|         lockfile (`Path`, *optional*): | ||||
|             Path to the lockfile. If not provided, the lockfile will be loaded from the caller's package metadata. | ||||
|  | ||||
|     Returns: | ||||
|         `ModuleType`: The imported kernel module. | ||||
|     If `lockfile` is not specified, the lockfile will be loaded from the | ||||
|     caller's package metadata. | ||||
|     """ | ||||
|     if lockfile is None: | ||||
|         locked_sha = _get_caller_locked_kernel(repo_id) | ||||
| @ -365,18 +242,7 @@ def load_kernel(repo_id: str, *, lockfile: Optional[Path] = None) -> ModuleType: | ||||
|  | ||||
|  | ||||
| def get_locked_kernel(repo_id: str, local_files_only: bool = False) -> ModuleType: | ||||
|     """ | ||||
|     Get a kernel using a lock file. | ||||
|  | ||||
|     Args: | ||||
|         repo_id (`str`): | ||||
|             The Hub repository containing the kernel. | ||||
|         local_files_only (`bool`, *optional*, defaults to `False`): | ||||
|             Whether to only use local files and not download from the Hub. | ||||
|  | ||||
|     Returns: | ||||
|         `ModuleType`: The imported kernel module. | ||||
|     """ | ||||
|     """Get a kernel using a lock file.""" | ||||
|     locked_sha = _get_caller_locked_kernel(repo_id) | ||||
|  | ||||
|     if locked_sha is None: | ||||
|  | ||||
| @ -1,41 +1,10 @@ | ||||
| import sys | ||||
|  | ||||
| import pytest | ||||
| import torch | ||||
|  | ||||
| has_cuda = ( | ||||
|     hasattr(torch.version, "cuda") | ||||
|     and torch.version.cuda is not None | ||||
|     and torch.cuda.device_count() > 0 | ||||
| ) | ||||
| has_rocm = ( | ||||
|     hasattr(torch.version, "hip") | ||||
|     and torch.version.hip is not None | ||||
|     and torch.cuda.device_count() > 0 | ||||
| ) | ||||
| has_xpu = ( | ||||
|     hasattr(torch.version, "xpu") | ||||
|     and torch.version.xpu is not None | ||||
|     and torch.xpu.device_count() > 0 | ||||
| ) | ||||
|  | ||||
|  | ||||
| def pytest_addoption(parser): | ||||
|     parser.addoption( | ||||
|         "--token", | ||||
|         action="store_true", | ||||
|         help="run tests that require a token with write permissions", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def pytest_runtest_setup(item): | ||||
|     if "cuda_only" in item.keywords and not has_cuda: | ||||
|         pytest.skip("skipping CUDA-only test on host without CUDA") | ||||
|     if "rocm_only" in item.keywords and not has_rocm: | ||||
|         pytest.skip("skipping ROCm-only test on host without ROCm") | ||||
|     if "linux_only" in item.keywords and not sys.platform.startswith("linux"): | ||||
|         pytest.skip("skipping Linux-only test on non-Linux platform") | ||||
|     if "darwin_only" in item.keywords and not sys.platform.startswith("darwin"): | ||||
|         pytest.skip("skipping macOS-only test on non-macOS platform") | ||||
|     if "xpu_only" in item.keywords and not has_xpu: | ||||
|         pytest.skip("skipping XPU-only test on host without XPU") | ||||
|     if "token" in item.keywords and not item.config.getoption("--token"): | ||||
|         pytest.skip("need --token option to run this test") | ||||
|  | ||||
| @ -1,12 +0,0 @@ | ||||
| [ | ||||
|   { | ||||
|     "repo_id": "kernels-test/versions", | ||||
|     "sha": "dc142fd6c9920c993d32be6358b78957c58681c3", | ||||
|     "variants": { | ||||
|       "torch-universal": { | ||||
|         "hash": "sha256-35ce0ccfe68e392cbc06feef72268f4c41a74b9920496a2c6ee8978db7f7c17c", | ||||
|         "hash_type": "git_lfs_concat" | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| ] | ||||
| @ -1,2 +0,0 @@ | ||||
| [tool.kernels.dependencies] | ||||
| "kernels-test/versions" = ">=0.1.0,<0.2.0" | ||||
| @ -1,7 +1,7 @@ | ||||
| import pytest | ||||
| import torch | ||||
|  | ||||
| from kernels import get_kernel, get_local_kernel, has_kernel, install_kernel | ||||
| from kernels import get_kernel, has_kernel | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| @ -9,20 +9,6 @@ def kernel(): | ||||
|     return get_kernel("kernels-community/activation") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def local_kernel_path(): | ||||
|     package_name, path = install_kernel("kernels-community/activation", "main") | ||||
|     # Path is the build variant path (build/torch-<...>), so the grandparent | ||||
|     # is the kernel repository path. | ||||
|     return package_name, path | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def local_kernel(local_kernel_path): | ||||
|     package_name, path = local_kernel_path | ||||
|     return get_local_kernel(path.parent.parent, package_name) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def metal_kernel(): | ||||
|     return get_kernel("kernels-test/relu-metal") | ||||
| @ -40,7 +26,7 @@ def device(): | ||||
|     return "cuda" | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_gelu_fast(kernel, device): | ||||
|     x = torch.arange(1, 10, dtype=torch.float16, device=device).view(3, 3) | ||||
|     y = torch.empty_like(x) | ||||
| @ -56,55 +42,6 @@ def test_gelu_fast(kernel, device): | ||||
|     assert torch.allclose(y, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| def test_local_kernel(local_kernel, device): | ||||
|     x = torch.arange(1, 10, dtype=torch.float16, device=device).view(3, 3) | ||||
|     y = torch.empty_like(x) | ||||
|  | ||||
|     local_kernel.gelu_fast(y, x) | ||||
|  | ||||
|     expected = torch.tensor( | ||||
|         [[0.8408, 1.9551, 2.9961], [4.0000, 5.0000, 6.0000], [7.0000, 8.0000, 9.0000]], | ||||
|         device=device, | ||||
|         dtype=torch.float16, | ||||
|     ) | ||||
|  | ||||
|     assert torch.allclose(y, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| def test_local_kernel_path_types(local_kernel_path, device): | ||||
|     package_name, path = local_kernel_path | ||||
|  | ||||
|     # Top-level repo path | ||||
|     # ie: /home/ubuntu/.cache/huggingface/hub/models--kernels-community--activation/snapshots/2fafa6a3a38ccb57a1a98419047cf7816ecbc071 | ||||
|     kernel = get_local_kernel(path.parent.parent, package_name) | ||||
|     x = torch.arange(1, 10, dtype=torch.float16, device=device).view(3, 3) | ||||
|     y = torch.empty_like(x) | ||||
|  | ||||
|     kernel.gelu_fast(y, x) | ||||
|     expected = torch.tensor( | ||||
|         [[0.8408, 1.9551, 2.9961], [4.0000, 5.0000, 6.0000], [7.0000, 8.0000, 9.0000]], | ||||
|         device=device, | ||||
|         dtype=torch.float16, | ||||
|     ) | ||||
|     assert torch.allclose(y, expected) | ||||
|  | ||||
|     # Build directory path | ||||
|     # ie: /home/ubuntu/.cache/huggingface/hub/models--kernels-community--activation/snapshots/2fafa6a3a38ccb57a1a98419047cf7816ecbc071/build | ||||
|     kernel = get_local_kernel(path.parent.parent / "build", package_name) | ||||
|     y = torch.empty_like(x) | ||||
|     kernel.gelu_fast(y, x) | ||||
|     assert torch.allclose(y, expected) | ||||
|  | ||||
|     # Explicit package path | ||||
|     # ie: /home/ubuntu/.cache/huggingface/hub/models--kernels-community--activation/snapshots/2fafa6a3a38ccb57a1a98419047cf7816ecbc071/build/torch28-cxx11-cu128-x86_64-linux | ||||
|     kernel = get_local_kernel(path, package_name) | ||||
|     y = torch.empty_like(x) | ||||
|     kernel.gelu_fast(y, x) | ||||
|     assert torch.allclose(y, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.darwin_only | ||||
| @pytest.mark.parametrize("dtype", [torch.float16, torch.float32]) | ||||
| def test_relu_metal(metal_kernel, dtype): | ||||
| @ -113,7 +50,7 @@ def test_relu_metal(metal_kernel, dtype): | ||||
|     assert torch.allclose(y, torch.relu(x)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| @pytest.mark.parametrize( | ||||
|     "kernel_exists", | ||||
|     [ | ||||
| @ -130,26 +67,7 @@ def test_has_kernel(kernel_exists): | ||||
|     assert has_kernel(repo_id, revision=revision) == kernel | ||||
|  | ||||
|  | ||||
| def test_version(): | ||||
|     kernel = get_kernel("kernels-test/versions") | ||||
|     assert kernel.version() == "0.2.0" | ||||
|     kernel = get_kernel("kernels-test/versions", version="<1.0.0") | ||||
|     assert kernel.version() == "0.2.0" | ||||
|     kernel = get_kernel("kernels-test/versions", version="<0.2.0") | ||||
|     assert kernel.version() == "0.1.1" | ||||
|     kernel = get_kernel("kernels-test/versions", version=">0.1.0,<0.2.0") | ||||
|     assert kernel.version() == "0.1.1" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r"No version.*satisfies requirement"): | ||||
|         get_kernel("kernels-test/versions", version=">0.2.0") | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r"Either a revision or a version.*not both"): | ||||
|         kernel = get_kernel( | ||||
|             "kernels-test/versions", revision="v0.1.0", version="<1.0.0" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_universal_kernel(universal_kernel): | ||||
|     torch.manual_seed(0) | ||||
|     A = torch.randint(-10, 10, (64, 128), dtype=torch.int8, device="cuda") | ||||
|  | ||||
| @ -16,21 +16,21 @@ def device(): | ||||
|     return "cuda" | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_gelu_small(kernel, device, benchmark): | ||||
|     x = torch.randn(32, 32, dtype=torch.float16, device=device) | ||||
|     y = torch.empty_like(x) | ||||
|     benchmark(kernel.gelu_fast, y, x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_gelu_medium(kernel, device, benchmark): | ||||
|     x = torch.randn(128, 128, dtype=torch.float16, device=device) | ||||
|     y = torch.empty_like(x) | ||||
|     benchmark(kernel.gelu_fast, y, x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_gelu_large(kernel, device, benchmark): | ||||
|     x = torch.randn(512, 512, dtype=torch.float16, device=device) | ||||
|     y = torch.empty_like(x) | ||||
|  | ||||
| @ -1,49 +0,0 @@ | ||||
| import inspect | ||||
|  | ||||
| import pytest | ||||
| from mktestdocs import check_docstring, get_codeblock_members | ||||
|  | ||||
| import kernels | ||||
|  | ||||
|  | ||||
| def all_public_functions(): | ||||
|     function_list = inspect.getmembers(kernels, inspect.isfunction) | ||||
|     return [func for _, func in function_list] | ||||
|  | ||||
|  | ||||
| def all_public_classes(): | ||||
|     class_list = inspect.getmembers(kernels, inspect.isclass) | ||||
|     return [cls for _, cls in class_list] | ||||
|  | ||||
|  | ||||
| def all_public_class_members(): | ||||
|     members = get_codeblock_members(*all_public_classes()) | ||||
|     return members | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     all_public_functions(), | ||||
|     ids=lambda d: d.__name__, | ||||
| ) | ||||
| def test_func_docstring(func): | ||||
|     check_docstring(obj=func) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.parametrize( | ||||
|     "cls", | ||||
|     all_public_classes(), | ||||
|     ids=lambda d: d.__name__, | ||||
| ) | ||||
| def test_class_docstring(cls): | ||||
|     check_docstring(obj=cls) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.parametrize( | ||||
|     "member", all_public_class_members(), ids=lambda d: d.__qualname__ | ||||
| ) | ||||
| def test_member_docstring(member): | ||||
|     check_docstring(member) | ||||
| @ -1,230 +0,0 @@ | ||||
| import random | ||||
| from typing import Generic, List, Optional, Tuple, TypeVar | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from kernels._interval_tree import IntervalTree, _Node | ||||
|  | ||||
| T = TypeVar("T") | ||||
|  | ||||
|  | ||||
| class SimpleIntervalStore(Generic[T]): | ||||
|     """A simple O(n) implementation that stores intervals in a list.""" | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.intervals: List[Tuple[int, int, T]] = [] | ||||
|  | ||||
|     def insert(self, start: int, end: int, data: T) -> None: | ||||
|         """Insert an interval into the store.""" | ||||
|         # Replace data if the interval already exists. | ||||
|         for i, (existing_start, existing_end, existing_data) in enumerate( | ||||
|             self.intervals | ||||
|         ): | ||||
|             if existing_start == start and existing_end == end: | ||||
|                 self.intervals[i] = (start, end, data) | ||||
|                 return | ||||
|  | ||||
|         self.intervals.append((start, end, data)) | ||||
|  | ||||
|     def find_smallest_interval(self, point: int) -> Optional[T]: | ||||
|         """Find the best match using linear search.""" | ||||
|         matches = [] | ||||
|         for start, end, data in self.intervals: | ||||
|             if start <= point <= end: | ||||
|                 matches.append((start, end, data)) | ||||
|  | ||||
|         if not matches: | ||||
|             return None | ||||
|  | ||||
|         # Return the smallest interval, sort by memory location when | ||||
|         # there are multiple matches with the same interval size. This | ||||
|         # mirrors the ordering in the intervan tree. | ||||
|         best_match = min(matches, key=lambda x: (x[1] - x[0], id(x[2]))) | ||||
|         return best_match[2] | ||||
|  | ||||
|  | ||||
| def is_balanced(tree: IntervalTree[T]) -> bool: | ||||
|     """Check if the AVL tree is properly balanced.""" | ||||
|  | ||||
|     def check_balance(node: Optional[_Node[T]]) -> Tuple[bool, int]: | ||||
|         if node is None: | ||||
|             return True, 0 | ||||
|  | ||||
|         # Left and right subtrees should be balanced. | ||||
|         left_balanced, left_height = check_balance(node.left) | ||||
|         if not left_balanced: | ||||
|             return False, -1 | ||||
|  | ||||
|         right_balanced, right_height = check_balance(node.right) | ||||
|         if not right_balanced: | ||||
|             return False, -1 | ||||
|  | ||||
|         # The difference in height should not exceed 1. | ||||
|         if abs(left_height - right_height) > 1: | ||||
|             return False, -1 | ||||
|  | ||||
|         # Check if the height is correct. | ||||
|         expected_height = 1 + max(left_height, right_height) | ||||
|         if node.height != expected_height: | ||||
|             return False, -1 | ||||
|  | ||||
|         return True, expected_height | ||||
|  | ||||
|     balanced, _ = check_balance(tree.root) | ||||
|     return balanced | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def populated_tree() -> IntervalTree[str]: | ||||
|     """Provides a pre-populated IntervalTree for testing.""" | ||||
|     tree = IntervalTree[str]() | ||||
|     kernels = [ | ||||
|         (80, 89, "Kernel_A_General_80_89"), | ||||
|         (86, 89, "Kernel_B_Ampere_86_89"), | ||||
|         (80, 86, "Kernel_C_Older_Ampere_80_86"), | ||||
|         (70, 75, "Kernel_D_Volta_70_75"), | ||||
|         (86, 87, "Kernel_E_Specific_86_87"), | ||||
|     ] | ||||
|     for start, end, name in kernels: | ||||
|         tree.insert(start, end, name) | ||||
|     return tree | ||||
|  | ||||
|  | ||||
| def test_find_smallest_interval_match_with_multiple_overlaps(populated_tree): | ||||
|     # Check that the smallest inteval is selected when there are | ||||
|     # multiple matching intervals. | ||||
|     assert populated_tree.find_smallest_interval(86) == "Kernel_E_Specific_86_87" | ||||
|  | ||||
|  | ||||
| def test_find_single_match(populated_tree): | ||||
|     assert populated_tree.find_smallest_interval(72) == "Kernel_D_Volta_70_75" | ||||
|     assert populated_tree.find_smallest_interval(75) == "Kernel_D_Volta_70_75" | ||||
|  | ||||
|  | ||||
| def test_no_match_outside_all_ranges(populated_tree): | ||||
|     # Check that no interval is found when the value is out of range | ||||
|     # (too small/too large). | ||||
|     assert populated_tree.find_smallest_interval(65) is None | ||||
|     assert populated_tree.find_smallest_interval(95) is None | ||||
|  | ||||
|  | ||||
| def test_no_match_in_gap_between_ranges(populated_tree): | ||||
|     # Check that no interval is found when the value is between two | ||||
|     # intervals. | ||||
|     assert populated_tree.find_smallest_interval(78) is None | ||||
|  | ||||
|  | ||||
| def test_boundary_conditions_start_and_end(populated_tree): | ||||
|     # Test exact upper/lower bounds of intervals. | ||||
|     assert populated_tree.find_smallest_interval(80) == "Kernel_C_Older_Ampere_80_86" | ||||
|     assert populated_tree.find_smallest_interval(89) == "Kernel_B_Ampere_86_89" | ||||
|  | ||||
|  | ||||
| def test_empty_tree(): | ||||
|     # Searching in an empty tree should return None. | ||||
|     empty_tree = IntervalTree[str]() | ||||
|     assert empty_tree.find_smallest_interval(100) is None | ||||
|  | ||||
|  | ||||
| def test_multiple_equally_specific_matches(): | ||||
|     # Check that we pick the match in a stable way when there is are | ||||
|     # multiple matching intervals with the same size. | ||||
|     tree = IntervalTree[str]() | ||||
|     str1 = "First_Narrow_Kernel" | ||||
|     str2 = "Second_Narrow_Kernel" | ||||
|     tree.insert(10, 20, "Wide_Kernel") | ||||
|     tree.insert(12, 17, str1) | ||||
|     tree.insert(14, 19, str2) | ||||
|  | ||||
|     if id(str1) < id(str2): | ||||
|         assert tree.find_smallest_interval(15) == str1 | ||||
|     else: | ||||
|         assert tree.find_smallest_interval(15) == str2 | ||||
|  | ||||
|  | ||||
| def test_property_based_interval_tree(): | ||||
|     # Quick-check property-based testing: | ||||
|     # | ||||
|     # - Verify that the tree is balanced after each insertion. | ||||
|     # - Verify the query against a simple list-based implementation. | ||||
|  | ||||
|     random.seed(42)  # For reproducible tests | ||||
|  | ||||
|     test_points = list(range(0, 101)) | ||||
|  | ||||
|     for _ in range(5): | ||||
|         tree = IntervalTree[str]() | ||||
|         simple = SimpleIntervalStore[str]() | ||||
|  | ||||
|         intervals = [] | ||||
|         for i in range(100): | ||||
|             start = random.randint(0, 90) | ||||
|             end = random.randint(start, 100) | ||||
|             data = f"interval_{i}_s{start}_e{end}" | ||||
|             intervals.append((start, end, data)) | ||||
|  | ||||
|         for i, (start, end, data) in enumerate(intervals): | ||||
|             tree.insert(start, end, data) | ||||
|             simple.insert(start, end, data) | ||||
|  | ||||
|             # Check that tree is still balanced | ||||
|             assert is_balanced( | ||||
|                 tree | ||||
|             ), f"Tree became unbalanced after inserting interval {i}: ({start}, {end})" | ||||
|  | ||||
|             for point in test_points: | ||||
|                 tree_result = tree.find_smallest_interval(point) | ||||
|                 simple_result = simple.find_smallest_interval(point) | ||||
|  | ||||
|                 assert tree_result == simple_result, ( | ||||
|                     f"Mismatch for point {point} after inserting {i+1} intervals. " | ||||
|                     f"Tree: {tree_result}, Simple: {simple_result}. " | ||||
|                     f"Last inserted: ({start}, {end})" | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| def test_property_based_edge_cases(): | ||||
|     random.seed(123) | ||||
|  | ||||
|     tree = IntervalTree[str]() | ||||
|     simple = SimpleIntervalStore[str]() | ||||
|  | ||||
|     # Single-point intervals. | ||||
|     for i in range(10): | ||||
|         point = random.randint(0, 100) | ||||
|         data = f"single_point_{i}_{point}" | ||||
|         tree.insert(point, point, data) | ||||
|         simple.insert(point, point, data) | ||||
|  | ||||
|         assert is_balanced( | ||||
|             tree | ||||
|         ), f"Tree unbalanced after inserting single point {point}" | ||||
|  | ||||
|         # Test the exact point and neighbors | ||||
|         for test_point in [point - 1, point, point + 1]: | ||||
|             if 0 <= test_point <= 100: | ||||
|                 tree_result = tree.find_smallest_interval(test_point) | ||||
|                 simple_result = simple.find_smallest_interval(test_point) | ||||
|                 assert tree_result == simple_result | ||||
|  | ||||
|  | ||||
| def test_unique_intervals_override(): | ||||
|     """Test that inserting an interval with the same start/end overrides the previous value.""" | ||||
|     tree = IntervalTree[str]() | ||||
|  | ||||
|     tree.insert(10, 20, "original_value") | ||||
|     assert tree.find_smallest_interval(15) == "original_value" | ||||
|  | ||||
|     tree.insert(10, 20, "new_value") | ||||
|     assert tree.find_smallest_interval(15) == "new_value" | ||||
|  | ||||
|     tree.insert(10, 25, "different_interval") | ||||
|     results = tree.search(15) | ||||
|     assert "new_value" in results | ||||
|     assert "different_interval" in results | ||||
|     assert len(results) == 2 | ||||
|  | ||||
|     tree.insert(10, 20, "final_value") | ||||
|     assert tree.find_smallest_interval(15) == "final_value" | ||||
|  | ||||
|     assert is_balanced(tree) | ||||
| @ -2,17 +2,9 @@ from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
|  | ||||
| import pytest | ||||
| import torch.nn as nn | ||||
|  | ||||
| from kernels import load_kernel | ||||
| from kernels.cli import download_kernels | ||||
| from kernels.layer import ( | ||||
|     LockedLayerRepository, | ||||
|     Mode, | ||||
|     kernelize, | ||||
|     use_kernel_forward_from_hub, | ||||
|     use_kernel_mapping, | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Mock download arguments class. | ||||
| @ -27,34 +19,9 @@ def test_download_all_hash_validation(): | ||||
|     download_kernels(DownloadArgs(all_variants=True, project_dir=project_dir)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.cuda_only | ||||
| @pytest.mark.linux_only | ||||
| def test_load_locked(): | ||||
|     project_dir = Path(__file__).parent / "kernel_locking" | ||||
|     # Also validates that hashing works correctly. | ||||
|     download_kernels(DownloadArgs(all_variants=False, project_dir=project_dir)) | ||||
|     load_kernel("kernels-community/activation", lockfile=project_dir / "kernels.lock") | ||||
|  | ||||
|  | ||||
| def test_layer_locked(): | ||||
|     project_dir = Path(__file__).parent / "layer_locking" | ||||
|  | ||||
|     @use_kernel_forward_from_hub("Version") | ||||
|     class Version(nn.Module): | ||||
|         def forward(self) -> str: | ||||
|             return "0.0.0" | ||||
|  | ||||
|     version = Version() | ||||
|  | ||||
|     with use_kernel_mapping( | ||||
|         { | ||||
|             "Version": { | ||||
|                 "cuda": LockedLayerRepository( | ||||
|                     repo_id="kernels-test/versions", | ||||
|                     layer_name="Version", | ||||
|                     lockfile=project_dir / "kernels.lock", | ||||
|                 ) | ||||
|             }, | ||||
|         } | ||||
|     ): | ||||
|         version = kernelize(version, device="cuda", mode=Mode.INFERENCE) | ||||
|         assert version() == "0.1.1" | ||||
|  | ||||
| @ -1,88 +0,0 @@ | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| import tempfile | ||||
| from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from typing import List | ||||
|  | ||||
| import pytest | ||||
| from huggingface_hub import model_info | ||||
|  | ||||
| from kernels.cli import upload_kernels | ||||
|  | ||||
| REPO_ID = "kernels-test/kernels-upload-test" | ||||
|  | ||||
| PY_CONTENT = """\ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| def main(): | ||||
|     print("Hello from torch-universal!") | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| """ | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class UploadArgs: | ||||
|     kernel_dir: None | ||||
|     repo_id: None | ||||
|     private: False | ||||
|  | ||||
|  | ||||
| def next_filename(path: Path) -> Path: | ||||
|     """ | ||||
|     Given a path like foo_2050.py, return foo_2051.py. | ||||
|     """ | ||||
|     m = re.match(r"^(.*?)(\d+)(\.py)$", path.name) | ||||
|     if not m: | ||||
|         raise ValueError( | ||||
|             f"Filename {path.name!r} does not match pattern <prefix>_<number>.py" | ||||
|         ) | ||||
|  | ||||
|     prefix, number, suffix = m.groups() | ||||
|     new_number = str(int(number) + 1).zfill(len(number)) | ||||
|     return path.with_name(f"{prefix}{new_number}{suffix}") | ||||
|  | ||||
|  | ||||
| def get_filename_to_change(repo_filenames): | ||||
|     for f in repo_filenames: | ||||
|         if "foo" in f and f.endswith(".py"): | ||||
|             filename_to_change = os.path.basename(f) | ||||
|             break | ||||
|     assert filename_to_change | ||||
|     return filename_to_change | ||||
|  | ||||
|  | ||||
| def get_filenames_from_a_repo(repo_id: str) -> List[str]: | ||||
|     try: | ||||
|         repo_info = model_info(repo_id=repo_id, files_metadata=True) | ||||
|         repo_siblings = repo_info.siblings | ||||
|         if repo_siblings is not None: | ||||
|             return [f.rfilename for f in repo_siblings] | ||||
|         else: | ||||
|             raise ValueError("No repo siblings found.") | ||||
|     except Exception as e: | ||||
|         logging.error(f"Error connecting to the Hub: {e}.") | ||||
|  | ||||
|  | ||||
| @pytest.mark.token | ||||
| def test_kernel_upload_deletes_as_expected(): | ||||
|     repo_filenames = get_filenames_from_a_repo(REPO_ID) | ||||
|     filename_to_change = get_filename_to_change(repo_filenames) | ||||
|  | ||||
|     with tempfile.TemporaryDirectory() as tmpdir: | ||||
|         path = f"{tmpdir}/build/torch-universal/upload_test" | ||||
|         build_dir = Path(path) | ||||
|         build_dir.mkdir(parents=True, exist_ok=True) | ||||
|         changed_filename = next_filename(Path(filename_to_change)) | ||||
|         script_path = build_dir / changed_filename | ||||
|         script_path.write_text(PY_CONTENT) | ||||
|         upload_kernels(UploadArgs(tmpdir, REPO_ID, False)) | ||||
|  | ||||
|     repo_filenames = get_filenames_from_a_repo(REPO_ID) | ||||
|     assert any(str(changed_filename) in k for k in repo_filenames), f"{repo_filenames=}" | ||||
|     assert not any( | ||||
|         str(filename_to_change) in k for k in repo_filenames | ||||
|     ), f"{repo_filenames=}" | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user
	