[nativert] libtorch kernel registry (#157150)

Summary: att

Test Plan:
ci

Rollback Plan:

Differential Revision: D77451703

Pull Request resolved: https://github.com/pytorch/pytorch/pull/157150
Approved by: https://github.com/georgiaphillips, https://github.com/henryoier
This commit is contained in:
dolpm
2025-07-16 12:36:51 +00:00
committed by PyTorch MergeBot
parent 55d888a616
commit 51a708ffc6
6 changed files with 1626 additions and 3 deletions

View File

@ -625,6 +625,10 @@ libtorch_nativert_sources = [
"torch/nativert/executor/memory/AliasAnalyzer.cpp",
"torch/nativert/executor/memory/LayoutPlanner.cpp",
"torch/nativert/executor/memory/LayoutManager.cpp",
"torch/nativert/kernels/KernelRegistry.cpp",
"torch/nativert/kernels/NativeKernels.cpp",
"torch/nativert/kernels/GeneratedStaticDispatchKernels.cpp",
"torch/nativert/kernels/GeneratedNativeStaticDispatchKernels.cpp",
]
torch_mobile_tracer_sources = [

View File

@ -64,6 +64,10 @@ class C10_API SizesAndStrides {
storageBytes(size_)));
}
bool operator!=(const SizesAndStrides& other) const {
return !(*this == other);
}
SizesAndStrides& operator=(const SizesAndStrides& rhs) {
if (this == &rhs) {
return *this;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
#pragma once
#include <torch/nativert/executor/OpKernel.h>
#include <torch/nativert/graph/Graph.h>
#include <torch/nativert/kernels/PrimKernelRegistry.h>
namespace torch::nativert {
TORCH_DECLARE_REGISTRY(
StaticallyDispatchedCPUKernelRegistry,
OpKernel,
const Node*,
c10::Device);
#define REGISTER_CPU_KERNEL(name, id, ...) \
class OpKernel_##id : public C10Kernel { \
public: \
OpKernel_##id(const Node* node, c10::Device device) \
: C10Kernel( \
node, \
device, \
torch::nativert::OpKernelKind::kStaticDispatchKernel) {} \
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
const override final { \
__VA_ARGS__; \
} \
}; \
C10_REGISTER_TYPED_CLASS( \
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
#define ALIASING_SPEC(...) __VA_ARGS__
#define REGISTER_ALIASING_CPU_KERNEL(name, id, aliasing_spec, ...) \
class OpKernel_##id : public C10Kernel { \
public: \
OpKernel_##id(const Node* node, c10::Device device) \
: C10Kernel( \
node, \
device, \
torch::nativert::OpKernelKind::kNativeStaticDispatchKernel, \
aliasing_spec) {} \
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
const override final { \
__VA_ARGS__; \
} \
}; \
C10_REGISTER_TYPED_CLASS( \
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
#define REGISTER_NATIVE_CPU_KERNEL(name, id, ...) \
class OpKernel_##id : public C10Kernel { \
public: \
OpKernel_##id(const Node* node, c10::Device device) \
: C10Kernel( \
node, \
device, \
torch::nativert::OpKernelKind::kNativeStaticDispatchKernel) {} \
void computeInternal(torch::nativert::ExecutionFrame& executionFrame) \
const override final { \
__VA_ARGS__; \
} \
}; \
C10_REGISTER_TYPED_CLASS( \
StaticallyDispatchedCPUKernelRegistry, name, OpKernel_##id)
inline at::Tensor create_empty_from(const at::Tensor& t) {
return at::detail::empty_cpu(
{0},
c10::typeMetaToScalarType(t.dtype()),
t.layout(),
t.device(),
std::nullopt,
std::nullopt);
}
inline at::Tensor create_empty_from(
const at::Tensor& t,
c10::ScalarType dtype) {
return at::detail::empty_cpu(
{0}, dtype, t.layout(), t.device(), std::nullopt, std::nullopt);
}
inline at::Tensor create_empty_from(const at::Tensor& t, c10::Device device) {
return at::detail::empty_cpu(
{0},
c10::typeMetaToScalarType(t.dtype()),
t.layout(),
device,
std::nullopt,
std::nullopt);
}
inline at::Tensor create_empty_from(const at::Tensor& t, c10::Layout layout) {
return at::detail::empty_cpu(
{0},
c10::typeMetaToScalarType(t.dtype()),
layout,
t.device(),
std::nullopt,
std::nullopt);
}
inline at::Tensor create_empty_from(
const at::Tensor& t,
c10::MemoryFormat memory_format) {
return at::detail::empty_cpu(
{0},
c10::typeMetaToScalarType(t.dtype()),
t.layout(),
t.device(),
std::nullopt,
memory_format);
}
inline at::Tensor create_empty_from(
const at::Tensor& t,
c10::ScalarType dtype,
c10::MemoryFormat memory_format) {
return at::detail::empty_cpu(
{0}, dtype, t.layout(), t.device(), std::nullopt, memory_format);
}
} // namespace torch::nativert

View File

@ -0,0 +1,113 @@
#include <torch/nativert/kernels/KernelRegistry.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/IndexingUtils.h>
#include <ATen/native/NonSymbolicBC.h>
namespace torch::nativert {
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.slice.Tensor", aten_slice_Tensor, {
const auto& self = KernelInput(0).toTensor();
const auto& dim = KernelInput(1).toInt();
const auto& start = KernelInput(2).toOptional<int64_t>();
const auto& end = KernelInput(3).toOptional<int64_t>();
const auto& step = KernelInput(4).toInt();
KernelOutput(0) = at::native::slice(self, dim, start, end, step);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.sym_size.int", aten_sym_size_int, {
const auto& self = KernelInput(0).toTensor();
const auto dim = KernelInput(1).toInt();
auto& out = KernelOutput(0);
TORCH_CHECK(dim >= 0 && dim < self.dim(), "Invalid dimension");
out = self.sym_size(dim);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.reshape.default", aten_reshape, {
const auto& self = KernelInput(0).toTensor();
const auto& shape = KernelInput(1).toIntVector();
KernelOutput(0) = at::native::reshape(self, shape);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.view.default", aten_view, {
const auto& self = KernelInput(0).toTensor();
const auto& size = KernelInput(1).toIntVector();
KernelOutput(0) = at::native::view(self, size);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.permute.default", aten_permute, {
const auto& self = KernelInput(0).toTensor();
const auto& dims = KernelInput(1).toDimVector();
KernelOutput(0) = at::native::permute(self, dims);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.select.int", aten_select, {
const auto& self = KernelInput(0).toTensor();
const auto dim = KernelInput(1).toInt();
const auto index = KernelInput(2).toInt();
KernelOutput(0) = at::native::select(self, dim, index);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.split.Tensor", aten_split_Tensor, {
const auto& self = KernelInput(0).toTensor();
const auto split_size = KernelInput(1).toInt();
const auto dim = KernelInput(2).toInt();
KernelOutput(0) = at::native::split(self, split_size, dim);
});
REGISTER_NATIVE_CPU_KERNEL(
"torch.ops.aten.split_with_sizes.default",
aten_split_with_sizes,
{
const auto& self = KernelInput(0).toTensor();
const auto& split_sizes = KernelInput(1).toIntList();
const auto dim = KernelInput(2).toInt();
KernelOutput(0) =
at::native::split_with_sizes(self, split_sizes.vec(), dim);
});
REGISTER_NATIVE_CPU_KERNEL(
"torch.ops.aten.tensor_split.sections",
aten_tensor_split_sections,
{
const auto& self = KernelInput(0).toTensor();
const auto sections = KernelInput(1).toInt();
const auto dim = KernelInput(2).toInt();
KernelOutput(0) =
at::native::tensor_split_sections_symint(self, sections, dim);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.item.default", aten_item, {
const auto& self = KernelInput(0).toTensor();
KernelOutput(0) = at::native::item(self);
});
REGISTER_NATIVE_CPU_KERNEL("torch.ops.aten.narrow.default", aten_narrow, {
const auto& self = KernelInput(0).toTensor();
const auto dim = KernelInput(1).toInt();
int64_t start = 0;
if (KernelInput(2).isScalar()) {
start = KernelInput(2).toInt();
} else {
auto& t = KernelInput(2).toTensor();
start = t.item<int64_t>();
}
const auto length = KernelInput(3).toInt();
TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
auto cur_size = self.sizes()[dim];
if (start != cur_size && start < 0) {
start = at::maybe_wrap_dim(start, cur_size);
}
TORCH_CHECK(
length >= 0 && start <= cur_size - length,
"start (",
start,
") + length (",
length,
") exceeds dimension size (",
cur_size,
").");
KernelOutput(0) = at::native::slice(self, dim, start, start + length, 1);
});
} // namespace torch::nativert

View File

@ -57,7 +57,7 @@ class OpKernel_prim_listpack : public OpKernel {
C10_REGISTER_TYPED_CLASS(
PrimKernelRegistry,
"prim.ListPack",
OpKernel_prim_listpack);
OpKernel_prim_listpack)
REGISTER_PRIM_KERNEL("prim.ListUnpack", prim_listunpack, {
RECORD_USER_SCOPE("nativert::OpKernel_prim_listunpack");
@ -114,7 +114,7 @@ class OpKernel_variadic_concat : public OpKernel {
C10_REGISTER_TYPED_CLASS(
PrimKernelRegistry,
"prim.VarConcat",
OpKernel_variadic_concat);
OpKernel_variadic_concat)
namespace {
@ -158,6 +158,6 @@ class OpKernel_variadic_stack : public OpKernel {
C10_REGISTER_TYPED_CLASS(
PrimKernelRegistry,
"prim.VarStack",
OpKernel_variadic_stack);
OpKernel_variadic_stack)
} // namespace torch::nativert