mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[nativert] libtorch kernel registry (#157150)
Summary: att Test Plan: ci Rollback Plan: Differential Revision: D77451703 Pull Request resolved: https://github.com/pytorch/pytorch/pull/157150 Approved by: https://github.com/georgiaphillips, https://github.com/henryoier
This commit is contained in:
@ -625,6 +625,10 @@ libtorch_nativert_sources = [
|
||||
"torch/nativert/executor/memory/AliasAnalyzer.cpp",
|
||||
"torch/nativert/executor/memory/LayoutPlanner.cpp",
|
||||
"torch/nativert/executor/memory/LayoutManager.cpp",
|
||||
"torch/nativert/kernels/KernelRegistry.cpp",
|
||||
"torch/nativert/kernels/NativeKernels.cpp",
|
||||
"torch/nativert/kernels/GeneratedStaticDispatchKernels.cpp",
|
||||
"torch/nativert/kernels/GeneratedNativeStaticDispatchKernels.cpp",
|
||||
]
|
||||
|
||||
torch_mobile_tracer_sources = [
|
||||
|
@ -64,6 +64,10 @@ class C10_API SizesAndStrides {
|
||||
storageBytes(size_)));
|
||||
}
|
||||
|
||||
bool operator!=(const SizesAndStrides& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
SizesAndStrides& operator=(const SizesAndStrides& rhs) {
|
||||
if (this == &rhs) {
|
||||
return *this;
|
||||
|
1380
torch/nativert/kernels/KernelRegistry.cpp
Normal file
1380
torch/nativert/kernels/KernelRegistry.cpp
Normal file
File diff suppressed because it is too large
Load Diff
122
torch/nativert/kernels/KernelRegistry.h
Normal file
122
torch/nativert/kernels/KernelRegistry.h
Normal file
@ -0,0 +1,122 @@
|
||||
#pragma once
|
||||
|
||||
#include <torch/nativert/executor/OpKernel.h>
|
||||
#include <torch/nativert/graph/Graph.h>
|
||||
#include <torch/nativert/kernels/PrimKernelRegistry.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
TORCH_DECLARE_REGISTRY(
|
||||
StaticallyDispatchedCPUKernelRegistry,
|
||||
OpKernel,
|
||||
const Node*,
|
||||
c10::Device);
|
||||
|
||||
#define REGISTER_CPU_KERNEL(name, id, ...) \
|
||||
class OpKernel_##id : public C10Kernel { \
|
||||
public: \
|
||||
OpKernel_##id(const Node* node, c10::Device device) \
|
||||
: C10Kernel( \
|
||||
node, \
|
||||
device, \
|
||||
torch::nativert::OpKernelKind::kStaticDispatchKernel) {} \
|
||||
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
|
||||
const override final { \
|
||||
__VA_ARGS__; \
|
||||
} \
|
||||
}; \
|
||||
C10_REGISTER_TYPED_CLASS( \
|
||||
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
|
||||
|
||||
#define ALIASING_SPEC(...) __VA_ARGS__
|
||||
|
||||
#define REGISTER_ALIASING_CPU_KERNEL(name, id, aliasing_spec, ...) \
|
||||
class OpKernel_##id : public C10Kernel { \
|
||||
public: \
|
||||
OpKernel_##id(const Node* node, c10::Device device) \
|
||||
: C10Kernel( \
|
||||
node, \
|
||||
device, \
|
||||
torch::nativert::OpKernelKind::kNativeStaticDispatchKernel, \
|
||||
aliasing_spec) {} \
|
||||
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
|
||||
const override final { \
|
||||
__VA_ARGS__; \
|
||||
} \
|
||||
}; \
|
||||
C10_REGISTER_TYPED_CLASS( \
|
||||
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
|
||||
|
||||
#define REGISTER_NATIVE_CPU_KERNEL(name, id, ...) \
|
||||
class OpKernel_##id : public C10Kernel { \
|
||||
public: \
|
||||
OpKernel_##id(const Node* node, c10::Device device) \
|
||||
: C10Kernel( \
|
||||
node, \
|
||||
device, \
|
||||
torch::nativert::OpKernelKind::kNativeStaticDispatchKernel) {} \
|
||||
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
|
||||
const override final { \
|
||||
__VA_ARGS__; \
|
||||
} \
|
||||
}; \
|
||||
C10_REGISTER_TYPED_CLASS( \
|
||||
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
|
||||
|
||||
inline at::Tensor create_empty_from(const at::Tensor& t) {
|
||||
return at::detail::empty_cpu(
|
||||
{0},
|
||||
c10::typeMetaToScalarType(t.dtype()),
|
||||
t.layout(),
|
||||
t.device(),
|
||||
std::nullopt,
|
||||
std::nullopt);
|
||||
}
|
||||
|
||||
inline at::Tensor create_empty_from(
|
||||
const at::Tensor& t,
|
||||
c10::ScalarType dtype) {
|
||||
return at::detail::empty_cpu(
|
||||
{0}, dtype, t.layout(), t.device(), std::nullopt, std::nullopt);
|
||||
}
|
||||
|
||||
inline at::Tensor create_empty_from(const at::Tensor& t, c10::Device device) {
|
||||
return at::detail::empty_cpu(
|
||||
{0},
|
||||
c10::typeMetaToScalarType(t.dtype()),
|
||||
t.layout(),
|
||||
device,
|
||||
std::nullopt,
|
||||
std::nullopt);
|
||||
}
|
||||
inline at::Tensor create_empty_from(const at::Tensor& t, c10::Layout layout) {
|
||||
return at::detail::empty_cpu(
|
||||
{0},
|
||||
c10::typeMetaToScalarType(t.dtype()),
|
||||
layout,
|
||||
t.device(),
|
||||
std::nullopt,
|
||||
std::nullopt);
|
||||
}
|
||||
|
||||
inline at::Tensor create_empty_from(
|
||||
const at::Tensor& t,
|
||||
c10::MemoryFormat memory_format) {
|
||||
return at::detail::empty_cpu(
|
||||
{0},
|
||||
c10::typeMetaToScalarType(t.dtype()),
|
||||
t.layout(),
|
||||
t.device(),
|
||||
std::nullopt,
|
||||
memory_format);
|
||||
}
|
||||
|
||||
inline at::Tensor create_empty_from(
|
||||
const at::Tensor& t,
|
||||
c10::ScalarType dtype,
|
||||
c10::MemoryFormat memory_format) {
|
||||
return at::detail::empty_cpu(
|
||||
{0}, dtype, t.layout(), t.device(), std::nullopt, memory_format);
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
113
torch/nativert/kernels/NativeKernels.cpp
Normal file
113
torch/nativert/kernels/NativeKernels.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
#include <torch/nativert/kernels/KernelRegistry.h>
|
||||
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/native/IndexingUtils.h>
|
||||
#include <ATen/native/NonSymbolicBC.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.slice.Tensor", aten_slice_Tensor, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto& dim = KernelInput(1).toInt();
|
||||
const auto& start = KernelInput(2).toOptional<int64_t>();
|
||||
const auto& end = KernelInput(3).toOptional<int64_t>();
|
||||
const auto& step = KernelInput(4).toInt();
|
||||
KernelOutput(0) = at::native::slice(self, dim, start, end, step);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.sym_size.int", aten_sym_size_int, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto dim = KernelInput(1).toInt();
|
||||
auto& out = KernelOutput(0);
|
||||
TORCH_CHECK(dim >= 0 && dim < self.dim(), "Invalid dimension");
|
||||
out = self.sym_size(dim);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.reshape.default", aten_reshape, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto& shape = KernelInput(1).toIntVector();
|
||||
KernelOutput(0) = at::native::reshape(self, shape);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.view.default", aten_view, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto& size = KernelInput(1).toIntVector();
|
||||
KernelOutput(0) = at::native::view(self, size);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.permute.default", aten_permute, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto& dims = KernelInput(1).toDimVector();
|
||||
KernelOutput(0) = at::native::permute(self, dims);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.select.int", aten_select, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto dim = KernelInput(1).toInt();
|
||||
const auto index = KernelInput(2).toInt();
|
||||
KernelOutput(0) = at::native::select(self, dim, index);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.split.Tensor", aten_split_Tensor, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto split_size = KernelInput(1).toInt();
|
||||
const auto dim = KernelInput(2).toInt();
|
||||
KernelOutput(0) = at::native::split(self, split_size, dim);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL(
|
||||
"torch.ops.aten.split_with_sizes.default",
|
||||
aten_split_with_sizes,
|
||||
{
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto& split_sizes = KernelInput(1).toIntList();
|
||||
const auto dim = KernelInput(2).toInt();
|
||||
KernelOutput(0) =
|
||||
at::native::split_with_sizes(self, split_sizes.vec(), dim);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL(
|
||||
"torch.ops.aten.tensor_split.sections",
|
||||
aten_tensor_split_sections,
|
||||
{
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto sections = KernelInput(1).toInt();
|
||||
const auto dim = KernelInput(2).toInt();
|
||||
KernelOutput(0) =
|
||||
at::native::tensor_split_sections_symint(self, sections, dim);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.item.default", aten_item, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
KernelOutput(0) = at::native::item(self);
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.narrow.default", aten_narrow, {
|
||||
const auto& self = KernelInput(0).toTensor();
|
||||
const auto dim = KernelInput(1).toInt();
|
||||
int64_t start = 0;
|
||||
if (KernelInput(2).isScalar()) {
|
||||
start = KernelInput(2).toInt();
|
||||
} else {
|
||||
auto& t = KernelInput(2).toTensor();
|
||||
start = t.item<int64_t>();
|
||||
}
|
||||
const auto length = KernelInput(3).toInt();
|
||||
TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
|
||||
auto cur_size = self.sizes()[dim];
|
||||
if (start != cur_size && start < 0) {
|
||||
start = at::maybe_wrap_dim(start, cur_size);
|
||||
}
|
||||
TORCH_CHECK(
|
||||
length >= 0 && start <= cur_size - length,
|
||||
"start (",
|
||||
start,
|
||||
") + length (",
|
||||
length,
|
||||
") exceeds dimension size (",
|
||||
cur_size,
|
||||
").");
|
||||
KernelOutput(0) = at::native::slice(self, dim, start, start + length, 1);
|
||||
});
|
||||
|
||||
} // namespace torch::nativert
|
@ -57,7 +57,7 @@ class OpKernel_prim_listpack : public OpKernel {
|
||||
C10_REGISTER_TYPED_CLASS(
|
||||
PrimKernelRegistry,
|
||||
"prim.ListPack",
|
||||
OpKernel_prim_listpack);
|
||||
OpKernel_prim_listpack)
|
||||
|
||||
REGISTER_PRIM_KERNEL("prim.ListUnpack", prim_listunpack, {
|
||||
RECORD_USER_SCOPE("nativert::OpKernel_prim_listunpack");
|
||||
@ -114,7 +114,7 @@ class OpKernel_variadic_concat : public OpKernel {
|
||||
C10_REGISTER_TYPED_CLASS(
|
||||
PrimKernelRegistry,
|
||||
"prim.VarConcat",
|
||||
OpKernel_variadic_concat);
|
||||
OpKernel_variadic_concat)
|
||||
|
||||
namespace {
|
||||
|
||||
@ -158,6 +158,6 @@ class OpKernel_variadic_stack : public OpKernel {
|
||||
C10_REGISTER_TYPED_CLASS(
|
||||
PrimKernelRegistry,
|
||||
"prim.VarStack",
|
||||
OpKernel_variadic_stack);
|
||||
OpKernel_variadic_stack)
|
||||
|
||||
} // namespace torch::nativert
|
||||
|
Reference in New Issue
Block a user