mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 04:04:57 +08:00 
			
		
		
		
	[nativert] libtorch kernel registry (#157150)
Summary: att Test Plan: ci Rollback Plan: Differential Revision: D77451703 Pull Request resolved: https://github.com/pytorch/pytorch/pull/157150 Approved by: https://github.com/georgiaphillips, https://github.com/henryoier
This commit is contained in:
		| @ -625,6 +625,10 @@ libtorch_nativert_sources = [ | |||||||
|     "torch/nativert/executor/memory/AliasAnalyzer.cpp", |     "torch/nativert/executor/memory/AliasAnalyzer.cpp", | ||||||
|     "torch/nativert/executor/memory/LayoutPlanner.cpp", |     "torch/nativert/executor/memory/LayoutPlanner.cpp", | ||||||
|     "torch/nativert/executor/memory/LayoutManager.cpp", |     "torch/nativert/executor/memory/LayoutManager.cpp", | ||||||
|  |     "torch/nativert/kernels/KernelRegistry.cpp", | ||||||
|  |     "torch/nativert/kernels/NativeKernels.cpp", | ||||||
|  |     "torch/nativert/kernels/GeneratedStaticDispatchKernels.cpp", | ||||||
|  |     "torch/nativert/kernels/GeneratedNativeStaticDispatchKernels.cpp", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| torch_mobile_tracer_sources = [ | torch_mobile_tracer_sources = [ | ||||||
|  | |||||||
| @ -64,6 +64,10 @@ class C10_API SizesAndStrides { | |||||||
|                   storageBytes(size_))); |                   storageBytes(size_))); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   bool operator!=(const SizesAndStrides& other) const { | ||||||
|  |     return !(*this == other); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   SizesAndStrides& operator=(const SizesAndStrides& rhs) { |   SizesAndStrides& operator=(const SizesAndStrides& rhs) { | ||||||
|     if (this == &rhs) { |     if (this == &rhs) { | ||||||
|       return *this; |       return *this; | ||||||
|  | |||||||
							
								
								
									
										1380
									
								
								torch/nativert/kernels/KernelRegistry.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1380
									
								
								torch/nativert/kernels/KernelRegistry.cpp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										122
									
								
								torch/nativert/kernels/KernelRegistry.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								torch/nativert/kernels/KernelRegistry.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,122 @@ | |||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <torch/nativert/executor/OpKernel.h> | ||||||
|  | #include <torch/nativert/graph/Graph.h> | ||||||
|  | #include <torch/nativert/kernels/PrimKernelRegistry.h> | ||||||
|  |  | ||||||
|  | namespace torch::nativert { | ||||||
|  |  | ||||||
|  | TORCH_DECLARE_REGISTRY( | ||||||
|  |     StaticallyDispatchedCPUKernelRegistry, | ||||||
|  |     OpKernel, | ||||||
|  |     const Node*, | ||||||
|  |     c10::Device); | ||||||
|  |  | ||||||
|  | #define REGISTER_CPU_KERNEL(name, id, ...)                                \ | ||||||
|  |   class OpKernel_##id : public C10Kernel {                                \ | ||||||
|  |    public:                                                                \ | ||||||
|  |     OpKernel_##id(const Node* node, c10::Device device)                   \ | ||||||
|  |         : C10Kernel(                                                      \ | ||||||
|  |               node,                                                       \ | ||||||
|  |               device,                                                     \ | ||||||
|  |               torch::nativert::OpKernelKind::kStaticDispatchKernel) {}    \ | ||||||
|  |     void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \ | ||||||
|  |         const override final {                                            \ | ||||||
|  |       __VA_ARGS__;                                                        \ | ||||||
|  |     }                                                                     \ | ||||||
|  |   };                                                                      \ | ||||||
|  |   C10_REGISTER_TYPED_CLASS(                                               \ | ||||||
|  |       StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id) | ||||||
|  |  | ||||||
|  | #define ALIASING_SPEC(...) __VA_ARGS__ | ||||||
|  |  | ||||||
|  | #define REGISTER_ALIASING_CPU_KERNEL(name, id, aliasing_spec, ...)        \ | ||||||
|  |   class OpKernel_##id : public C10Kernel {                                \ | ||||||
|  |    public:                                                                \ | ||||||
|  |     OpKernel_##id(const Node* node, c10::Device device)                   \ | ||||||
|  |         : C10Kernel(                                                      \ | ||||||
|  |               node,                                                       \ | ||||||
|  |               device,                                                     \ | ||||||
|  |               torch::nativert::OpKernelKind::kNativeStaticDispatchKernel, \ | ||||||
|  |               aliasing_spec) {}                                           \ | ||||||
|  |     void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \ | ||||||
|  |         const override final {                                            \ | ||||||
|  |       __VA_ARGS__;                                                        \ | ||||||
|  |     }                                                                     \ | ||||||
|  |   };                                                                      \ | ||||||
|  |   C10_REGISTER_TYPED_CLASS(                                               \ | ||||||
|  |       StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id) | ||||||
|  |  | ||||||
|  | #define REGISTER_NATIVE_CPU_KERNEL(name, id, ...)                            \ | ||||||
|  |   class OpKernel_##id : public C10Kernel {                                   \ | ||||||
|  |    public:                                                                   \ | ||||||
|  |     OpKernel_##id(const Node* node, c10::Device device)                      \ | ||||||
|  |         : C10Kernel(                                                         \ | ||||||
|  |               node,                                                          \ | ||||||
|  |               device,                                                        \ | ||||||
|  |               torch::nativert::OpKernelKind::kNativeStaticDispatchKernel) {} \ | ||||||
|  |     void computeInternal(torch::nativert::ExecutionFrame& executionFrame)    \ | ||||||
|  |         const override final {                                               \ | ||||||
|  |       __VA_ARGS__;                                                           \ | ||||||
|  |     }                                                                        \ | ||||||
|  |   };                                                                         \ | ||||||
|  |   C10_REGISTER_TYPED_CLASS(                                                  \ | ||||||
|  |       StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id) | ||||||
|  |  | ||||||
|  | inline at::Tensor create_empty_from(const at::Tensor& t) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, | ||||||
|  |       c10::typeMetaToScalarType(t.dtype()), | ||||||
|  |       t.layout(), | ||||||
|  |       t.device(), | ||||||
|  |       std::nullopt, | ||||||
|  |       std::nullopt); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | inline at::Tensor create_empty_from( | ||||||
|  |     const at::Tensor& t, | ||||||
|  |     c10::ScalarType dtype) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, dtype, t.layout(), t.device(), std::nullopt, std::nullopt); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | inline at::Tensor create_empty_from(const at::Tensor& t, c10::Device device) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, | ||||||
|  |       c10::typeMetaToScalarType(t.dtype()), | ||||||
|  |       t.layout(), | ||||||
|  |       device, | ||||||
|  |       std::nullopt, | ||||||
|  |       std::nullopt); | ||||||
|  | } | ||||||
|  | inline at::Tensor create_empty_from(const at::Tensor& t, c10::Layout layout) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, | ||||||
|  |       c10::typeMetaToScalarType(t.dtype()), | ||||||
|  |       layout, | ||||||
|  |       t.device(), | ||||||
|  |       std::nullopt, | ||||||
|  |       std::nullopt); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | inline at::Tensor create_empty_from( | ||||||
|  |     const at::Tensor& t, | ||||||
|  |     c10::MemoryFormat memory_format) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, | ||||||
|  |       c10::typeMetaToScalarType(t.dtype()), | ||||||
|  |       t.layout(), | ||||||
|  |       t.device(), | ||||||
|  |       std::nullopt, | ||||||
|  |       memory_format); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | inline at::Tensor create_empty_from( | ||||||
|  |     const at::Tensor& t, | ||||||
|  |     c10::ScalarType dtype, | ||||||
|  |     c10::MemoryFormat memory_format) { | ||||||
|  |   return at::detail::empty_cpu( | ||||||
|  |       {0}, dtype, t.layout(), t.device(), std::nullopt, memory_format); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace torch::nativert | ||||||
							
								
								
									
										113
									
								
								torch/nativert/kernels/NativeKernels.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								torch/nativert/kernels/NativeKernels.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | |||||||
|  | #include <torch/nativert/kernels/KernelRegistry.h> | ||||||
|  |  | ||||||
|  | #include <ATen/NativeFunctions.h> | ||||||
|  | #include <ATen/native/IndexingUtils.h> | ||||||
|  | #include <ATen/native/NonSymbolicBC.h> | ||||||
|  |  | ||||||
|  | namespace torch::nativert { | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.slice.Tensor", aten_slice_Tensor, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto& dim = KernelInput(1).toInt(); | ||||||
|  |   const auto& start = KernelInput(2).toOptional<int64_t>(); | ||||||
|  |   const auto& end = KernelInput(3).toOptional<int64_t>(); | ||||||
|  |   const auto& step = KernelInput(4).toInt(); | ||||||
|  |   KernelOutput(0) = at::native::slice(self, dim, start, end, step); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.sym_size.int", aten_sym_size_int, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto dim = KernelInput(1).toInt(); | ||||||
|  |   auto& out = KernelOutput(0); | ||||||
|  |   TORCH_CHECK(dim >= 0 && dim < self.dim(), "Invalid dimension"); | ||||||
|  |   out = self.sym_size(dim); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.reshape.default", aten_reshape, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto& shape = KernelInput(1).toIntVector(); | ||||||
|  |   KernelOutput(0) = at::native::reshape(self, shape); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.view.default", aten_view, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto& size = KernelInput(1).toIntVector(); | ||||||
|  |   KernelOutput(0) = at::native::view(self, size); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.permute.default", aten_permute, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto& dims = KernelInput(1).toDimVector(); | ||||||
|  |   KernelOutput(0) = at::native::permute(self, dims); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.select.int", aten_select, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto dim = KernelInput(1).toInt(); | ||||||
|  |   const auto index = KernelInput(2).toInt(); | ||||||
|  |   KernelOutput(0) = at::native::select(self, dim, index); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.split.Tensor", aten_split_Tensor, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto split_size = KernelInput(1).toInt(); | ||||||
|  |   const auto dim = KernelInput(2).toInt(); | ||||||
|  |   KernelOutput(0) = at::native::split(self, split_size, dim); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL( | ||||||
|  |     "torch.ops.aten.split_with_sizes.default", | ||||||
|  |     aten_split_with_sizes, | ||||||
|  |     { | ||||||
|  |       const auto& self = KernelInput(0).toTensor(); | ||||||
|  |       const auto& split_sizes = KernelInput(1).toIntList(); | ||||||
|  |       const auto dim = KernelInput(2).toInt(); | ||||||
|  |       KernelOutput(0) = | ||||||
|  |           at::native::split_with_sizes(self, split_sizes.vec(), dim); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL( | ||||||
|  |     "torch.ops.aten.tensor_split.sections", | ||||||
|  |     aten_tensor_split_sections, | ||||||
|  |     { | ||||||
|  |       const auto& self = KernelInput(0).toTensor(); | ||||||
|  |       const auto sections = KernelInput(1).toInt(); | ||||||
|  |       const auto dim = KernelInput(2).toInt(); | ||||||
|  |       KernelOutput(0) = | ||||||
|  |           at::native::tensor_split_sections_symint(self, sections, dim); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.item.default", aten_item, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   KernelOutput(0) = at::native::item(self); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.narrow.default", aten_narrow, { | ||||||
|  |   const auto& self = KernelInput(0).toTensor(); | ||||||
|  |   const auto dim = KernelInput(1).toInt(); | ||||||
|  |   int64_t start = 0; | ||||||
|  |   if (KernelInput(2).isScalar()) { | ||||||
|  |     start = KernelInput(2).toInt(); | ||||||
|  |   } else { | ||||||
|  |     auto& t = KernelInput(2).toTensor(); | ||||||
|  |     start = t.item<int64_t>(); | ||||||
|  |   } | ||||||
|  |   const auto length = KernelInput(3).toInt(); | ||||||
|  |   TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor."); | ||||||
|  |   auto cur_size = self.sizes()[dim]; | ||||||
|  |   if (start != cur_size && start < 0) { | ||||||
|  |     start = at::maybe_wrap_dim(start, cur_size); | ||||||
|  |   } | ||||||
|  |   TORCH_CHECK( | ||||||
|  |       length >= 0 && start <= cur_size - length, | ||||||
|  |       "start (", | ||||||
|  |       start, | ||||||
|  |       ") + length (", | ||||||
|  |       length, | ||||||
|  |       ") exceeds dimension size (", | ||||||
|  |       cur_size, | ||||||
|  |       ")."); | ||||||
|  |   KernelOutput(0) = at::native::slice(self, dim, start, start + length, 1); | ||||||
|  | }); | ||||||
|  |  | ||||||
|  | } // namespace torch::nativert | ||||||
| @ -57,7 +57,7 @@ class OpKernel_prim_listpack : public OpKernel { | |||||||
| C10_REGISTER_TYPED_CLASS( | C10_REGISTER_TYPED_CLASS( | ||||||
|     PrimKernelRegistry, |     PrimKernelRegistry, | ||||||
|     "prim.ListPack", |     "prim.ListPack", | ||||||
|     OpKernel_prim_listpack); |     OpKernel_prim_listpack) | ||||||
|  |  | ||||||
| REGISTER_PRIM_KERNEL("prim.ListUnpack", prim_listunpack, { | REGISTER_PRIM_KERNEL("prim.ListUnpack", prim_listunpack, { | ||||||
|   RECORD_USER_SCOPE("nativert::OpKernel_prim_listunpack"); |   RECORD_USER_SCOPE("nativert::OpKernel_prim_listunpack"); | ||||||
| @ -114,7 +114,7 @@ class OpKernel_variadic_concat : public OpKernel { | |||||||
| C10_REGISTER_TYPED_CLASS( | C10_REGISTER_TYPED_CLASS( | ||||||
|     PrimKernelRegistry, |     PrimKernelRegistry, | ||||||
|     "prim.VarConcat", |     "prim.VarConcat", | ||||||
|     OpKernel_variadic_concat); |     OpKernel_variadic_concat) | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
| @ -158,6 +158,6 @@ class OpKernel_variadic_stack : public OpKernel { | |||||||
| C10_REGISTER_TYPED_CLASS( | C10_REGISTER_TYPED_CLASS( | ||||||
|     PrimKernelRegistry, |     PrimKernelRegistry, | ||||||
|     "prim.VarStack", |     "prim.VarStack", | ||||||
|     OpKernel_variadic_stack); |     OpKernel_variadic_stack) | ||||||
|  |  | ||||||
| } // namespace torch::nativert | } // namespace torch::nativert | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user