Revert "[BE] Cleanup old ExecuTorch codegen and runtime code (#154165)"

This reverts commit 515c19a3856e953c0fe23a0ed4fa844f8eea34d8.

Reverted https://github.com/pytorch/pytorch/pull/154165 on behalf of https://github.com/seemethere due to This is failing when attempting to test against executorch main internally, author has acknowledged that this should be reverted ([comment](https://github.com/pytorch/pytorch/pull/154165#issuecomment-2931489616))
This commit is contained in:
PyTorch MergeBot
2025-06-02 16:28:46 +00:00
parent 981bdb39ca
commit 67067512a1
43 changed files with 5091 additions and 1 deletions

View File

@ -1 +1 @@
22e7dbd922fbc3f2ae6e97be66e2329fab978619
b173722085b3f555d6ba4533d6bbaddfd7c71144

View File

@ -52,6 +52,12 @@ fi
export USE_LLVM=/opt/llvm
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
# To build test_edge_op_registration
export BUILD_EXECUTORCH=ON
export USE_CUDA=0
fi
if ! which conda; then
# In ROCm CIs, we are doing cross compilation on build machines with
# intel cpu and later run tests on machines with amd cpu.

View File

@ -526,6 +526,7 @@ if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
endif()
option(TRACING_BASED
"Master flag to build Lite Interpreter with tracing build option" OFF)
option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
# This is a fix for a rare build issue on Ubuntu: symbol lookup error:
# miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol:
# mkl_blas_dsyrk

View File

@ -1319,6 +1319,12 @@ install(FILES
"${TORCH_SRC_DIR}/custom_class_detail.h"
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
if(BUILD_TEST)
if(BUILD_EXECUTORCH)
add_subdirectory(
${TORCH_ROOT}/test/edge
${CMAKE_BINARY_DIR}/test_edge_op_registration
)
endif()
if(BUILD_LITE_INTERPRETER)
add_subdirectory(
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime

View File

@ -272,6 +272,10 @@ select = [
"F401",
"F403",
]
"torchgen/executorch/api/types/__init__.py" = [
"F401",
"F403",
]
"torch/utils/collect_env.py" = [
"UP", # collect_env.py needs to work with older versions of Python
]

View File

@ -699,6 +699,8 @@ class build_ext(setuptools.command.build_ext.build_ext):
)
if cmake_cache_vars["USE_LIGHTWEIGHT_DISPATCH"]:
report("-- Using lightweight dispatch")
if cmake_cache_vars["BUILD_EXECUTORCH"]:
report("-- Building Executorch")
if cmake_cache_vars["USE_ITT"]:
report("-- Using ITT")

74
test/edge/CMakeLists.txt Normal file
View File

@ -0,0 +1,74 @@
cmake_minimum_required(VERSION 3.15)
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../..)
set(TEST_ROOT ${TORCH_ROOT}/test/edge)
set(OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/out)
file(GLOB_RECURSE all_python "${TORCH_ROOT}/torchgen/*.py")
include(${TORCH_ROOT}/cmake/public/utils.cmake)
append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
# Generate unboxing kernels
set(GEN_COMMAND
Python::Interpreter -m torchgen.gen_executorch
--source-path=${TEST_ROOT}
--install-dir=${OUTPUT_DIRECTORY}
--tags-path=${TORCH_ROOT}/aten/src/ATen/native/tags.yaml
--aten-yaml-path=${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml
--use-aten-lib
--op-selection-yaml-path=${TEST_ROOT}/selected_operators.yaml
--custom-ops-yaml-path=${TEST_ROOT}/custom_ops.yaml
)
set(GEN_COMMAND_sources
${OUTPUT_DIRECTORY}/RegisterCodegenUnboxedKernelsEverything.cpp
${OUTPUT_DIRECTORY}/RegisterCPUCustomOps.cpp
${OUTPUT_DIRECTORY}/Functions.h
${OUTPUT_DIRECTORY}/NativeFunctions.h
${OUTPUT_DIRECTORY}/CustomOpsNativeFunctions.h
)
message(STATUS "Generating sources for unboxing kernels ${GEN_COMMAND}")
add_custom_command(
COMMENT "Generating sources"
OUTPUT ${GEN_COMMAND_sources}
COMMAND ${GEN_COMMAND}
DEPENDS
${all_python}
${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml
${TORCH_ROOT}/aten/src/ATen/native/tags.yaml
${TEST_ROOT}/templates/Functions.h
${TEST_ROOT}/templates/NativeFunctions.h
${TEST_ROOT}/templates/RegisterCodegenUnboxedKernels.cpp
${TEST_ROOT}/templates/RegisterDispatchKeyCustomOps.cpp
WORKING_DIRECTORY ${TORCH_ROOT}
)
add_custom_target(unbox_target DEPENDS ${GEN_COMMAND_sources})
add_library(unbox_lib STATIC
${GEN_COMMAND_sources}
${TEST_ROOT}/operator_registry.cpp
${TEST_ROOT}/custom_ops.cpp
)
target_include_directories(unbox_lib PUBLIC ${TEST_ROOT} ${ATen_CPU_INCLUDE})
target_link_libraries(unbox_lib PUBLIC torch_cpu)
target_compile_definitions(unbox_lib PUBLIC USE_ATEN_LIB)
add_executable(test_edge_op_registration
${TEST_ROOT}/test_operator_registration.cpp
${TEST_ROOT}/test_main.cpp
)
target_compile_definitions(test_edge_op_registration PRIVATE USE_GTEST)
target_link_libraries(test_edge_op_registration PRIVATE gtest_main unbox_lib)
if((CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") OR (APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
target_link_options(test_edge_op_registration PRIVATE
"-Wl,-force_load,$<TARGET_FILE:unbox_lib>"
)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
target_link_options(test_edge_op_registration PRIVATE
"-Wl,--whole-archive,$<TARGET_FILE:unbox_lib>,--no-whole-archive"
)
endif()
if(INSTALL_TEST)
set_target_properties(test_edge_op_registration PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_edge_op_registration DESTINATION bin)
endif()

479
test/edge/Evalue.h Normal file
View File

@ -0,0 +1,479 @@
#pragma once
#include <ATen/ATen.h>
/**
* WARNING: EValue is a class used by Executorch, for its boxed operators. It
* contains similar logic as `IValue` in PyTorch, by providing APIs to convert
* boxed values to unboxed values.
*
* It's mirroring a fbcode internal source file
* [`EValue.h`](https://www.internalfb.com/code/fbsource/xplat/executorch/core/values/Evalue.h).
*
* The reason why we are mirroring this class, is to make sure we have CI job
* coverage on torchgen logic, given that torchgen is used for both Executorch
* and PyTorch.
*
* If any of the logic here needs to be changed, please update fbcode version of
* `Evalue.h` as well. These two versions will be merged as soon as Executorch
* is in OSS (hopefully by Q2 2023).
*/
namespace torch {
namespace executor {
#define ET_CHECK_MSG TORCH_CHECK_MSG
#define EXECUTORCH_FORALL_TAGS(_) \
_(None) \
_(Tensor) \
_(String) \
_(Double) \
_(Int) \
_(Bool) \
_(ListBool) \
_(ListDouble) \
_(ListInt) \
_(ListTensor) \
_(ListScalar) \
_(ListOptionalTensor)
enum class Tag : uint32_t {
#define DEFINE_TAG(x) x,
EXECUTORCH_FORALL_TAGS(DEFINE_TAG)
#undef DEFINE_TAG
};
struct EValue;
template <typename T>
struct evalue_to_const_ref_overload_return {
using type = T;
};
template <>
struct evalue_to_const_ref_overload_return<at::Tensor> {
using type = const at::Tensor&;
};
template <typename T>
struct evalue_to_ref_overload_return {
using type = T;
};
template <>
struct evalue_to_ref_overload_return<at::Tensor> {
using type = at::Tensor&;
};
/*
* Helper class used to correlate EValues in the executor table, with the
* unwrapped list of the proper type. Because values in the runtime's values
* table can change during execution, we cannot statically allocate list of
* objects at deserialization. Imagine the serialized list says index 0 in the
* value table is element 2 in the list, but during execution the value in
* element 2 changes (in the case of tensor this means the TensorImpl* stored in
* the tensor changes). To solve this instead they must be created dynamically
* whenever they are used.
*/
template <typename T>
class EValObjectList {
public:
EValObjectList() = default;
/*
* Wrapped_vals is a list of pointers into the values table of the runtime
* whose destinations correlate with the elements of the list, unwrapped_vals
* is a container of the same size whose serves as memory to construct the
* unwrapped vals.
*/
EValObjectList(EValue** wrapped_vals, T* unwrapped_vals, int size)
: wrapped_vals_(wrapped_vals, size), unwrapped_vals_(unwrapped_vals) {}
/*
* Constructs and returns the list of T specified by the EValue pointers
*/
at::ArrayRef<T> get() const;
private:
// Source of truth for the list
at::ArrayRef<EValue*> wrapped_vals_;
// Same size as wrapped_vals
mutable T* unwrapped_vals_;
};
// Aggregate typing system similar to IValue only slimmed down with less
// functionality, no dependencies on atomic, and fewer supported types to better
// suit embedded systems (ie no intrusive ptr)
struct EValue {
union Payload {
// When in ATen mode at::Tensor is not trivially copyable, this nested union
// lets us handle tensor as a special case while leaving the rest of the
// fields in a simple state instead of requiring a switch on tag everywhere.
union TriviallyCopyablePayload {
TriviallyCopyablePayload() : as_int(0) {}
// Scalar supported through these 3 types
int64_t as_int;
double as_double;
bool as_bool;
// TODO(jakeszwe): convert back to pointers to optimize size of this
// struct
at::ArrayRef<char> as_string;
at::ArrayRef<int64_t> as_int_list;
at::ArrayRef<double> as_double_list;
at::ArrayRef<bool> as_bool_list;
EValObjectList<at::Tensor> as_tensor_list;
EValObjectList<std::optional<at::Tensor>> as_list_optional_tensor;
} copyable_union;
// Since a Tensor just holds a TensorImpl*, there's no value to use Tensor*
// here.
at::Tensor as_tensor;
Payload() {}
~Payload() {}
};
// Data storage and type tag
Payload payload;
Tag tag;
// Basic ctors and assignments
EValue(const EValue& rhs) : EValue(rhs.payload, rhs.tag) {}
EValue(EValue&& rhs) noexcept : tag(rhs.tag) {
moveFrom(std::move(rhs));
}
EValue& operator=(EValue&& rhs) & noexcept {
if (&rhs == this) {
return *this;
}
destroy();
moveFrom(std::move(rhs));
return *this;
}
EValue& operator=(EValue const& rhs) & {
// Define copy assignment through copy ctor and move assignment
*this = EValue(rhs);
return *this;
}
~EValue() {
destroy();
}
/****** None Type ******/
EValue() : tag(Tag::None) {
payload.copyable_union.as_int = 0;
}
bool isNone() const {
return tag == Tag::None;
}
/****** Int Type ******/
/*implicit*/ EValue(int64_t i) : tag(Tag::Int) {
payload.copyable_union.as_int = i;
}
bool isInt() const {
return tag == Tag::Int;
}
int64_t toInt() const {
ET_CHECK_MSG(isInt(), "EValue is not an int.");
return payload.copyable_union.as_int;
}
/****** Double Type ******/
/*implicit*/ EValue(double d) : tag(Tag::Double) {
payload.copyable_union.as_double = d;
}
bool isDouble() const {
return tag == Tag::Double;
}
double toDouble() const {
ET_CHECK_MSG(isDouble(), "EValue is not a Double.");
return payload.copyable_union.as_double;
}
/****** Bool Type ******/
/*implicit*/ EValue(bool b) : tag(Tag::Bool) {
payload.copyable_union.as_bool = b;
}
bool isBool() const {
return tag == Tag::Bool;
}
bool toBool() const {
ET_CHECK_MSG(isBool(), "EValue is not a Bool.");
return payload.copyable_union.as_bool;
}
/****** Scalar Type ******/
/// Construct an EValue using the implicit value of a Scalar.
/*implicit*/ EValue(at::Scalar s) {
if (s.isIntegral(false)) {
tag = Tag::Int;
payload.copyable_union.as_int = s.to<int64_t>();
} else if (s.isFloatingPoint()) {
tag = Tag::Double;
payload.copyable_union.as_double = s.to<double>();
} else if (s.isBoolean()) {
tag = Tag::Bool;
payload.copyable_union.as_bool = s.to<bool>();
} else {
ET_CHECK_MSG(false, "Scalar passed to EValue is not initialized.");
}
}
bool isScalar() const {
return tag == Tag::Int || tag == Tag::Double || tag == Tag::Bool;
}
at::Scalar toScalar() const {
// Convert from implicit value to Scalar using implicit constructors.
if (isDouble()) {
return toDouble();
} else if (isInt()) {
return toInt();
} else if (isBool()) {
return toBool();
} else {
ET_CHECK_MSG(false, "EValue is not a Scalar.");
return c10::Scalar();
}
}
/****** Tensor Type ******/
/*implicit*/ EValue(at::Tensor t) : tag(Tag::Tensor) {
// When built in aten mode, at::Tensor has a non trivial constructor
// destructor, so regular assignment to a union field is UB. Instead we must
// go through placement new (which causes a refcount bump).
new (&payload.as_tensor) at::Tensor(t);
}
bool isTensor() const {
return tag == Tag::Tensor;
}
at::Tensor toTensor() && {
ET_CHECK_MSG(isTensor(), "EValue is not a Tensor.");
return std::move(payload.as_tensor);
}
at::Tensor& toTensor() & {
ET_CHECK_MSG(isTensor(), "EValue is not a Tensor.");
return payload.as_tensor;
}
const at::Tensor& toTensor() const& {
ET_CHECK_MSG(isTensor(), "EValue is not a Tensor.");
return payload.as_tensor;
}
/****** String Type ******/
/*implicit*/ EValue(const char* s, size_t size) : tag(Tag::String) {
payload.copyable_union.as_string = at::ArrayRef<char>(s, size);
}
bool isString() const {
return tag == Tag::String;
}
std::string_view toString() const {
ET_CHECK_MSG(isString(), "EValue is not a String.");
return std::string_view(
payload.copyable_union.as_string.data(),
payload.copyable_union.as_string.size());
}
/****** Int List Type ******/
/*implicit*/ EValue(at::ArrayRef<int64_t> i) : tag(Tag::ListInt) {
payload.copyable_union.as_int_list = i;
}
bool isIntList() const {
return tag == Tag::ListInt;
}
at::ArrayRef<int64_t> toIntList() const {
ET_CHECK_MSG(isIntList(), "EValue is not an Int List.");
return payload.copyable_union.as_int_list;
}
/****** Bool List Type ******/
/*implicit*/ EValue(at::ArrayRef<bool> b) : tag(Tag::ListBool) {
payload.copyable_union.as_bool_list = b;
}
bool isBoolList() const {
return tag == Tag::ListBool;
}
at::ArrayRef<bool> toBoolList() const {
ET_CHECK_MSG(isBoolList(), "EValue is not a Bool List.");
return payload.copyable_union.as_bool_list;
}
/****** Double List Type ******/
/*implicit*/ EValue(at::ArrayRef<double> d) : tag(Tag::ListDouble) {
payload.copyable_union.as_double_list = d;
}
bool isDoubleList() const {
return tag == Tag::ListDouble;
}
at::ArrayRef<double> toDoubleList() const {
ET_CHECK_MSG(isDoubleList(), "EValue is not a Double List.");
return payload.copyable_union.as_double_list;
}
/****** Tensor List Type ******/
/*implicit*/ EValue(EValObjectList<at::Tensor> t) : tag(Tag::ListTensor) {
payload.copyable_union.as_tensor_list = t;
}
bool isTensorList() const {
return tag == Tag::ListTensor;
}
at::ArrayRef<at::Tensor> toTensorList() const {
ET_CHECK_MSG(isTensorList(), "EValue is not a Tensor List.");
return payload.copyable_union.as_tensor_list.get();
}
/****** List Optional Tensor Type ******/
/*implicit*/ EValue(EValObjectList<std::optional<at::Tensor>> t)
: tag(Tag::ListOptionalTensor) {
payload.copyable_union.as_list_optional_tensor = t;
}
bool isListOptionalTensor() const {
return tag == Tag::ListOptionalTensor;
}
at::ArrayRef<std::optional<at::Tensor>> toListOptionalTensor() {
return payload.copyable_union.as_list_optional_tensor.get();
}
/****** ScalarType Type ******/
at::ScalarType toScalarType() const {
ET_CHECK_MSG(isInt(), "EValue is not a ScalarType.");
return static_cast<at::ScalarType>(payload.copyable_union.as_int);
}
/****** MemoryFormat Type ******/
at::MemoryFormat toMemoryFormat() const {
ET_CHECK_MSG(isInt(), "EValue is not a MemoryFormat.");
return static_cast<at::MemoryFormat>(payload.copyable_union.as_int);
}
template <typename T>
T to() &&;
template <typename T>
typename evalue_to_ref_overload_return<T>::type to() &;
/**
* Converts the EValue to an optional object that can represent both T and
* an uninitialized state.
*/
template <typename T>
inline std::optional<T> toOptional() {
if (this->isNone()) {
return std::nullopt;
}
return this->to<T>();
}
private:
// Pre cond: the payload value has had its destructor called
void clearToNone() noexcept {
payload.copyable_union.as_int = 0;
tag = Tag::None;
}
// Shared move logic
void moveFrom(EValue&& rhs) noexcept {
if (rhs.isTensor()) {
new (&payload.as_tensor) at::Tensor(std::move(rhs.payload.as_tensor));
rhs.payload.as_tensor.~Tensor();
} else {
payload.copyable_union = rhs.payload.copyable_union;
}
tag = rhs.tag;
rhs.clearToNone();
}
// Destructs stored tensor if there is one
void destroy() {
// Necessary for ATen tensor to refcount decrement the intrusive_ptr to
// tensorimpl that got a refcount increment when we placed it in the evalue,
// no-op if executorch tensor #ifdef could have a
// minor performance bump for a code maintainability hit
if (isTensor()) {
payload.as_tensor.~Tensor();
} else if (isTensorList()) {
for (auto& tensor : toTensorList()) {
tensor.~Tensor();
}
} else if (isListOptionalTensor()) {
for (auto& optional_tensor : toListOptionalTensor()) {
optional_tensor.~optional();
}
}
}
EValue(const Payload& p, Tag t) : tag(t) {
if (isTensor()) {
new (&payload.as_tensor) at::Tensor(p.as_tensor);
} else {
payload.copyable_union = p.copyable_union;
}
}
};
#define EVALUE_DEFINE_TO(T, method_name) \
template <> \
inline evalue_to_ref_overload_return<T>::type EValue::to<T>()& { \
return static_cast<T>(this->method_name()); \
}
template <>
inline at::Tensor& EValue::to<at::Tensor>() & {
return this->toTensor();
}
EVALUE_DEFINE_TO(at::Scalar, toScalar)
EVALUE_DEFINE_TO(int64_t, toInt)
EVALUE_DEFINE_TO(bool, toBool)
EVALUE_DEFINE_TO(double, toDouble)
EVALUE_DEFINE_TO(std::string_view, toString)
EVALUE_DEFINE_TO(at::ScalarType, toScalarType)
EVALUE_DEFINE_TO(at::MemoryFormat, toMemoryFormat)
EVALUE_DEFINE_TO(std::optional<at::Tensor>, toOptional<at::Tensor>)
EVALUE_DEFINE_TO(at::ArrayRef<int64_t>, toIntList)
EVALUE_DEFINE_TO(
std::optional<at::ArrayRef<int64_t>>,
toOptional<at::ArrayRef<int64_t>>)
EVALUE_DEFINE_TO(
std::optional<at::ArrayRef<double>>,
toOptional<at::ArrayRef<double>>)
EVALUE_DEFINE_TO(at::ArrayRef<std::optional<at::Tensor>>, toListOptionalTensor)
EVALUE_DEFINE_TO(at::ArrayRef<double>, toDoubleList)
#undef EVALUE_DEFINE_TO
template <typename T>
at::ArrayRef<T> EValObjectList<T>::get() const {
for (size_t i = 0; i < wrapped_vals_.size(); i++) {
unwrapped_vals_[i] = wrapped_vals_[i]->template to<T>();
}
return at::ArrayRef<T>{unwrapped_vals_, wrapped_vals_.size()};
}
} // namespace executor
} // namespace torch

10
test/edge/custom_ops.cpp Normal file
View File

@ -0,0 +1,10 @@
#include <ATen/Tensor.h>
namespace custom {
namespace native {
at::Tensor& add_3_out(const at::Tensor& a, const at::Tensor& b, const at::Tensor& c, at::Tensor& out) {
out = a.add(b).add(c);
return out;
}
}
}

View File

@ -0,0 +1,4 @@
- func: custom::add_3.out(Tensor a, Tensor b, Tensor c, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: custom::add_3_out

33
test/edge/event_tracer.h Normal file
View File

@ -0,0 +1,33 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <stdlib.h>
#include <cstdint>
#pragma once
namespace torch {
namespace executor {
typedef uint32_t AllocatorID;
typedef int32_t ChainID;
typedef uint32_t DebugHandle;
/**
* EventTracer is a class that users can inherit and implement to
* log/serialize/stream etc. the profiling and debugging events that are
* generated at runtime for a model. An example of this is the ETDump
* implementation in the SDK codebase that serializes these events to a
* flatbuffer.
*/
class EventTracer {};
struct EventTracerEntry {};
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <event_tracer.h>
/**
* @file
*
* This file contains the hooks that are inserted across various parts of the
* core runtime code to call into the EventTracer class for logging of profiling
* and debugging events. Any calls made to the EventTracer from the runtime must
* be made via these hooks.
* Users shouldn't directly add these hooks in their code and it's meant only
* for usage in ExecuTorch internal code.
*
* The benefit of defining these hooks is that we can easily control whether or
* not we want to compile in the EventTracer code based on the status of the
* ET_EVENT_TRACER_ENABLED flag.
*/
namespace torch {
namespace executor {
namespace internal {
/**
* This class enables scope based profiling where needed using RAII.
* Profiling will be started when the object is created and will end
* when the object goes out of scope.
*/
class EventTracerProfileScope final {
public:
EventTracerProfileScope(EventTracer* event_tracer, const char* name) {};
~EventTracerProfileScope() {};
private:
EventTracer* event_tracer_;
EventTracerEntry event_entry_;
};
/**
* This class enables scope based profiling where needed using RAII.
* Profiling will be started when the object is created and will end
* when the object goes out of scope.
*/
class EventTracerProfileOpScope final {
public:
EventTracerProfileOpScope(EventTracer* event_tracer, const char* name) {};
~EventTracerProfileOpScope() {};
private:
EventTracer* event_tracer_;
EventTracerEntry event_entry_;
};
/**
* This class helps us set and then clear out the chain id and debug handle
* values stored in the event tracer class using RAII. This is typically called
* in the executor loop before entering the codegen layer to configure the chain
* id and debug handle of the current instruction being executed.
* After we return from the kernel execution we can then reset the chain id and
* debug handle to defaults when this object goes out of scope.
*/
class EventTracerProfileInstructionScope final {
public:
EventTracerProfileInstructionScope(
EventTracer* event_tracer,
ChainID chain_idx,
DebugHandle debug_handle) {};
~EventTracerProfileInstructionScope() {};
private:
EventTracer* event_tracer_;
};
void event_tracer_log_evalue(EventTracer* event_tracer, EValue& evalue) {
(void)evalue;
}
} // namespace internal
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,44 @@
#pragma once
#include "event_tracer.h"
namespace torch {
namespace executor {
/**
* Bucket type abstraction that contains many elements of runtime state that
* a kernel author may want available, but would otherwise be unable to access.
*
* Forwarded along to all operators when running in lean mode. NOTE: Will not be
* forwarded to operators if running in ATen mode as those operators do not
* expect to receive a KernelRuntimeContext and would not use it.
*
* This includes things like setting an error state, a scratch allocator for
* operators that need more then constant space, and a TensorResizer for dynamic
* shape tensors allowing programs to be more flexible with Tensor shape.
*/
class KernelRuntimeContext {
public:
/**
* Construct a new kernel runtime context along with an optional event tracer.
*/
KernelRuntimeContext(EventTracer* event_tracer = nullptr)
: event_tracer_(event_tracer) {}
/**
* INTERNAL ONLY
*
* Returns a pointer to an instance of EventTracer to do profiling/debugging
* logging inside the codegen layer. This is only for internal usage inside
* the codegen layer and users should not be accessing this.
*/
EventTracer* internal_event_tracer() {
return event_tracer_;
}
private:
EventTracer* event_tracer_;
};
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,45 @@
#include <c10/util/Exception.h>
#include <operator_registry.h>
namespace torch {
namespace executor {
KernelRegistry& getKernelRegistry() {
static KernelRegistry kernel_registry;
return kernel_registry;
}
bool register_kernels(const ArrayRef<Kernel>& kernels) {
return getKernelRegistry().register_kernels(kernels);
}
bool KernelRegistry::register_kernels(
const ArrayRef<Kernel>& kernels) {
for (const auto& kernel : kernels) {
this->kernels_map_[kernel.name_] = kernel.kernel_;
}
return true;
}
bool hasKernelFn(const char* name) {
return getKernelRegistry().hasKernelFn(name);
}
bool KernelRegistry::hasKernelFn(const char* name) {
auto kernel = this->kernels_map_.find(name);
return kernel != this->kernels_map_.end();
}
KernelFunction& getKernelFn(const char* name) {
return getKernelRegistry().getKernelFn(name);
}
KernelFunction& KernelRegistry::getKernelFn(const char* name) {
auto kernel = this->kernels_map_.find(name);
TORCH_CHECK_MSG(kernel != this->kernels_map_.end(), "Kernel not found!");
return kernel->second;
}
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,72 @@
#pragma once
#include <cstring>
#include <functional>
#include <map>
#include "Evalue.h"
#include "kernel_runtime_context.h"
#include <c10/util/ArrayRef.h>
namespace torch {
namespace executor {
using KernelFunction = std::function<void(KernelRuntimeContext&, EValue**)>;
template<typename T>
using ArrayRef = at::ArrayRef<T>;
#define EXECUTORCH_SCOPE_PROF(x)
struct Kernel {
const char* name_;
KernelFunction kernel_;
Kernel() = default;
/**
* We are doing a copy of the string pointer instead of duplicating the string
* itself, we require the lifetime of the kernel name to be at least as long
* as the kernel registry.
*/
explicit Kernel(const char* name, KernelFunction func)
: name_(name), kernel_(func) {}
};
/**
* See KernelRegistry::hasKernelFn()
*/
bool hasKernelFn(const char* name);
/**
* See KernelRegistry::getKernelFn()
*/
KernelFunction& getKernelFn(const char* name);
[[nodiscard]] bool register_kernels(const ArrayRef<Kernel>&);
struct KernelRegistry {
public:
KernelRegistry() : kernelRegSize_(0) {}
bool register_kernels(const ArrayRef<Kernel>&);
/**
* Checks whether an kernel with a given name is registered
*/
bool hasKernelFn(const char* name);
/**
* Checks whether an kernel with a given name is registered
*/
KernelFunction& getKernelFn(const char* name);
private:
std::map<const char*, KernelFunction> kernels_map_;
uint32_t kernelRegSize_;
};
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,463 @@
build_features: []
custom_classes: []
include_all_non_op_selectives: false
include_all_operators: false
kernel_metadata: {}
et_kernel_metadata:
custom::add_3.out:
- v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
- v1/3;0,1,2,3|3;0,1,2,3|3;0,1,2,3
aten::add.out:
- v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
- v1/3;0,1,2,3|3;0,1,2,3|3;0,1,2,3
operators:
aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::_reshape_alias_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::_softmax.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::_to_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::_unique2.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::add.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::addmm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::avg_pool2d.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::baddbmm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::bitwise_and.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::bmm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::cat.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::clamp.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::clone.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::constant_pad_nd.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::conv1d.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::convolution.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::cumsum.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::detach_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::div.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::embedding.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::eq.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::eq.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::exp.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::expand_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::floor_divide.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::gelu.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::grid_sampler_2d.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::gt.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::index.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::index_put.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::index_select.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::leaky_relu.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::linalg_inv_ex.inverse:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::logit.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::masked_fill.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::max.unary_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::max_pool2d_with_indices.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::mean.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::minimum.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::mm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::mul.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::native_batch_norm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::native_layer_norm.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::ne.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::nonzero.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::permute_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::pixel_shuffle.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::relu.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::remainder.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::repeat.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::round.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::rsub.Scalar_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::select_copy.int_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::sigmoid.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::slice_copy.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::softplus.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::sort.values:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::split_copy.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::split_with_sizes_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::stack.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::sub.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::sum.IntList_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::tanh.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::topk.values:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::transpose_copy.int_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::unbind_copy.int_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::unsafe_split.Tensor_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::unsqueeze_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::upsample_bilinear2d.vec_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::upsample_nearest2d.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::upsample_nearest2d.vec_out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::view_copy.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
aten::zeros_like.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true
custom::add_3.out:
debug_info:
- functions.yaml
include_all_overloads: false
is_root_operator: true
is_used_for_training: true

View File

@ -0,0 +1,25 @@
// clang-format off
#pragma once
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <c10/util/Optional.h>
// ${generated_comment}
${static_dispatch_extra_headers}
namespace torch {
namespace executor {
${Functions_declarations}
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,31 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_native.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <c10/util/Optional.h>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${nativeFunctions_declarations}

View File

@ -0,0 +1,28 @@
#include <operator_registry.h>
#include <event_tracer_hooks.h>
#include "${fn_header}" // Generated Function import headers
namespace torch {
namespace executor {
using namespace internal;
namespace {
using KernelArrayRef = ::at::ArrayRef<::torch::executor::Kernel>;
static Kernel kernels_to_register[] = {
${unboxed_kernels} // Generated operators
};
// Explicitly convert to ArrayRef, so that the API can take an empty C array of
// Kernels.
static KernelArrayRef kernel_array_ref(
kernels_to_register,
kernels_to_register + sizeof(kernels_to_register) / sizeof(Kernel));
// Return value not used. Keep the static variable assignment to register
// operators in static initialization time.
static auto success_with_kernel_reg = register_kernels(kernel_array_ref);
} // namespace
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,27 @@
// clang-format off
// Generated code for registering custom operators into the dispatcher.
#include <torch/library.h>
#include <ATen/Tensor.h>
$ops_headers
namespace torch {
namespace executor {
namespace function {
${dispatch_anonymous_definitions}
// All out variants ops
${static_init_dispatch_registrations}
namespace ${dispatch_namespace}
{
${dispatch_namespaced_definitions}
} // namespace ${dispatch_namespace}
} // namespace function
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
// ${generated_comment}
// Exposing an API for registering all kernels at once.
#include <executorch/runtime/core/evalue.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/kernel/operator_registry.h>
#include <executorch/runtime/platform/profiler.h>
namespace torch {
namespace executor {
Error register_all_kernels();
} // namespace executor
} // namespace torch

View File

@ -0,0 +1,10 @@
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <torch/library.h>
namespace at {
TORCH_LIBRARY_FRAGMENT(aten, m) {
${aten_schema_registrations};
}
$schema_registrations
} // namespace at

18
test/edge/test_main.cpp Normal file
View File

@ -0,0 +1,18 @@
#include <gtest/gtest.h>
std::string add_negative_flag(const std::string& flag) {
std::string filter = ::testing::GTEST_FLAG(filter);
if (filter.find('-') == std::string::npos) {
filter.push_back('-');
} else {
filter.push_back(':');
}
filter += flag;
return filter;
}
int main(int argc, char* argv[]) {
::testing::InitGoogleTest(&argc, argv);
::testing::GTEST_FLAG(filter) = add_negative_flag("*_CUDA:*_MultiCUDA");
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,53 @@
#include "kernel_runtime_context.h"
#include "operator_registry.h"
#include <gtest/gtest.h>
namespace torch {
namespace executor {
// add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
TEST(OperatorRegistrationTest, Add) {
EValue values[4];
values[0] = EValue(at::ones({2, 3}));
values[1] = EValue(at::ones({2, 3}));
values[2] = EValue(int64_t(1));
values[3] = EValue(at::zeros({2, 3}));
ASSERT_TRUE(hasKernelFn("aten::add.out"));
auto op = getKernelFn("aten::add.out");
EValue* kernel_values[4];
for (size_t i = 0; i < 4; i++) {
kernel_values[i] = &values[i];
}
KernelRuntimeContext context{};
op(context, kernel_values);
at::Tensor expected = at::ones({2, 3});
expected = at::fill(expected, 2);
ASSERT_TRUE(expected.equal(kernel_values[3]->toTensor()));
}
// custom::add_3.out(Tensor a, Tensor b, Tensor c, *, Tensor(a!) out) -> Tensor(a!)
TEST(OperatorRegistrationTest, CustomAdd3) {
EValue values[4];
values[0] = EValue(at::ones({2, 3}));
values[1] = EValue(at::ones({2, 3}));
values[2] = EValue(at::ones({2, 3}));
values[3] = EValue(at::zeros({2, 3}));
ASSERT_TRUE(hasKernelFn("custom::add_3.out"));
auto op = getKernelFn("custom::add_3.out");
EValue* kernel_values[4];
for (size_t i = 0; i < 4; i++) {
kernel_values[i] = &values[i];
}
KernelRuntimeContext context{};
op(context, kernel_values);
at::Tensor expected = at::ones({2, 3});
expected = at::fill(expected, 3);
ASSERT_TRUE(expected.equal(kernel_values[3]->toTensor()));
}
} // namespace executor
} // namespace torch

View File

@ -287,3 +287,18 @@ def define_tools_targets(
":autograd",
],
)
python_test(
name = "test_torchgen_executorch",
srcs = [
"test/test_executorch_gen.py",
"test/test_executorch_signatures.py",
"test/test_executorch_types.py",
"test/test_executorch_unboxing.py",
],
contacts = contacts,
visibility = ["PUBLIC"],
deps = [
torchgen_deps,
],
)

View File

@ -0,0 +1,147 @@
from __future__ import annotations
import tempfile
import unittest
from typing import Any
from unittest.mock import ANY, Mock, patch
import expecttest
import torchgen
from torchgen.executorch.api.custom_ops import ComputeNativeFunctionStub
from torchgen.executorch.model import ETKernelIndex
from torchgen.gen_executorch import gen_headers
from torchgen.model import Location, NativeFunction
from torchgen.selective_build.selector import SelectiveBuilder
from torchgen.utils import FileManager
SPACES = " "
def _get_native_function_from_yaml(yaml_obj: dict[str, object]) -> NativeFunction:
native_function, _ = NativeFunction.from_yaml(
yaml_obj,
loc=Location(__file__, 1),
valid_tags=set(),
)
return native_function
class TestComputeNativeFunctionStub(expecttest.TestCase):
"""
Could use torch.testing._internal.common_utils to reduce boilerplate.
GH CI job doesn't build torch before running tools unit tests, hence
manually adding these parametrized tests.
"""
def _test_function_schema_generates_correct_kernel(
self, obj: dict[str, Any], expected: str
) -> None:
func = _get_native_function_from_yaml(obj)
gen = ComputeNativeFunctionStub()
res = gen(func)
self.assertIsNotNone(res)
self.assertExpectedInline(
str(res),
expected,
)
def test_function_schema_generates_correct_kernel_tensor_out(self) -> None:
obj = {"func": "custom::foo.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"}
expected = """
at::Tensor & wrapper_CPU_out_foo_out(const at::Tensor & self, at::Tensor & out) {
return out;
}
"""
self._test_function_schema_generates_correct_kernel(obj, expected)
def test_function_schema_generates_correct_kernel_no_out(self) -> None:
obj = {"func": "custom::foo.Tensor(Tensor self) -> Tensor"}
expected = """
at::Tensor wrapper_CPU_Tensor_foo(const at::Tensor & self) {
return self;
}
"""
self._test_function_schema_generates_correct_kernel(obj, expected)
def test_function_schema_generates_correct_kernel_no_return(self) -> None:
obj = {"func": "custom::foo.out(Tensor self, *, Tensor(a!)[] out) -> ()"}
expected = f"""
void wrapper_CPU_out_foo_out(const at::Tensor & self, at::TensorList out) {{
{SPACES}
}}
"""
self._test_function_schema_generates_correct_kernel(obj, expected)
def test_function_schema_generates_correct_kernel_3_returns(self) -> None:
obj = {
"func": "custom::foo(Tensor self, Tensor[] other) -> (Tensor, Tensor, Tensor)"
}
expected = """
::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_CPU__foo(const at::Tensor & self, at::TensorList other) {
return ::std::tuple<at::Tensor, at::Tensor, at::Tensor>(
at::Tensor(), at::Tensor(), at::Tensor()
);
}
"""
self._test_function_schema_generates_correct_kernel(obj, expected)
def test_function_schema_generates_correct_kernel_1_return_no_out(self) -> None:
obj = {"func": "custom::foo(Tensor[] a) -> Tensor"}
expected = """
at::Tensor wrapper_CPU__foo(at::TensorList a) {
return at::Tensor();
}
"""
self._test_function_schema_generates_correct_kernel(obj, expected)
def test_schema_has_no_return_type_argument_throws(self) -> None:
func = _get_native_function_from_yaml(
{"func": "custom::foo.bool(Tensor self) -> bool"}
)
gen = ComputeNativeFunctionStub()
with self.assertRaisesRegex(Exception, "Can't handle this return type"):
gen(func)
class TestGenCustomOpsHeader(unittest.TestCase):
@patch.object(torchgen.utils.FileManager, "write_with_template")
@patch.object(torchgen.utils.FileManager, "write")
def test_fm_writes_custom_ops_header_when_boolean_is_true(
self, unused: Mock, mock_method: Mock
) -> None:
with tempfile.TemporaryDirectory() as tempdir:
fm = FileManager(tempdir, tempdir, False)
gen_headers(
native_functions=[],
gen_custom_ops_header=True,
custom_ops_native_functions=[],
selector=SelectiveBuilder.get_nop_selector(),
kernel_index=ETKernelIndex(index={}),
cpu_fm=fm,
use_aten_lib=False,
)
mock_method.assert_called_once_with(
"CustomOpsNativeFunctions.h", "NativeFunctions.h", ANY
)
@patch.object(torchgen.utils.FileManager, "write_with_template")
@patch.object(torchgen.utils.FileManager, "write")
def test_fm_doesnot_writes_custom_ops_header_when_boolean_is_false(
self, unused: Mock, mock_method: Mock
) -> None:
with tempfile.TemporaryDirectory() as tempdir:
fm = FileManager(tempdir, tempdir, False)
gen_headers(
native_functions=[],
gen_custom_ops_header=False,
custom_ops_native_functions=[],
selector=SelectiveBuilder.get_nop_selector(),
kernel_index=ETKernelIndex(index={}),
cpu_fm=fm,
use_aten_lib=False,
)
mock_method.assert_not_called()

View File

@ -0,0 +1,689 @@
from __future__ import annotations
import os
import tempfile
import unittest
import yaml
from torchgen.executorch.model import ETKernelIndex, ETKernelKey
from torchgen.gen import LineLoader
from torchgen.gen_executorch import (
ComputeCodegenUnboxedKernels,
gen_functions_declarations,
parse_yaml_files,
translate_native_yaml,
)
from torchgen.model import (
BackendIndex,
BackendMetadata,
DispatchKey,
Location,
NativeFunction,
OperatorName,
)
from torchgen.selective_build.selector import SelectiveBuilder
TEST_YAML = """
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
ufunc_inner_loop:
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
ScalarOnly: add (Bool)
dispatch:
SparseCPU: add_out_sparse_cpu
SparseCUDA: add_out_sparse_cuda
SparseCsrCPU: add_out_sparse_csr_cpu
SparseCsrCUDA: add_out_sparse_csr_cuda
MkldnnCPU: mkldnn_add_out
MPS: add_out_mps
- func: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: add.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: add_sparse
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
MkldnnCPU: mkldnn_add
ZeroTensor: add_zerotensor
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
tags: core
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: mul_out
MPS: mul_out_mps
SparseCPU: mul_out_sparse_cpu
SparseCUDA: mul_out_sparse_cuda
SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
MkldnnCPU: mkldnn_mul_out
- func: mul.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: mul.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: mul_sparse
SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
MkldnnCPU: mkldnn_mul
ZeroTensor: mul_zerotensor
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
tags: core
"""
TEST_KERNEL_YAML = """
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
ufunc_inner_loop:
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
ScalarOnly: add (Bool)
type_alias:
T0: [Float, Double]
T1: [Double, Int]
dim_order_alias:
D0: [0, 1, 2, 3]
D1: [0, 3, 2, 1]
kernels:
- arg_meta: null
kernel_name: default_impl
- arg_meta:
self: [T0, D0]
other: [T1, D0]
out: [T0, D0]
kernel_name: test_impl
- arg_meta:
self: [T1, D0]
other: [T1, D1]
out: [T0, D1]
kernel_name: test_impl_2
- func: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: add.out
variants: function, method
tags: core
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
type_alias:
T0: [Float]
T1: [Double]
dim_order_alias:
D0: [0, 1, 2, 3]
kernels:
- arg_meta: null
kernel_name: default_impl
- arg_meta:
self: [T0, D0]
other: [T1, D0]
out: [T0, D0]
kernel_name: test_impl
- func: mul.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: mul.out
variants: function, method
tags: core
"""
class TestParseNativeYaml(unittest.TestCase):
def setUp(self) -> None:
self.temp_dir = tempfile.mkdtemp()
self.aten_yaml_path = os.path.join(self.temp_dir, "test_native_functions.yaml")
with open(self.aten_yaml_path, "w") as f:
f.write(TEST_YAML)
self.ops_yaml_path = os.path.join(self.temp_dir, "test.yaml")
self.tags_yaml_path = os.path.join(self.temp_dir, "tags.yaml")
with open(self.tags_yaml_path, "w") as f:
f.write(
"""
- tag: core
desc: test
"""
)
with open(self.ops_yaml_path, "w") as f:
f.write(
"""
- op: add.out
device_check: NoCheck # TensorIterator
dispatch:
CPU: torch::executor::add_out_kernel
- op: mul.out
device_check: NoCheck # TensorIterator
dispatch:
CPU: torch::executor::mul_out_kernel
"""
)
def test_translate_native_yaml_writes_correct_data(self) -> None:
out_yaml_path = os.path.join(self.temp_dir, "out.yaml")
with open(out_yaml_path, "w") as out_file:
translate_native_yaml(
tags_yaml_path=self.tags_yaml_path,
aten_yaml_path=self.aten_yaml_path,
native_yaml_path=self.ops_yaml_path,
use_aten_lib=False,
out_file=out_file,
)
with open(out_yaml_path) as out_file:
es = yaml.load(out_file, Loader=LineLoader)
self.assertTrue(all("func" in e for e in es))
self.assertTrue(all(e.get("variants") == "function" for e in es))
# Check that kernel fields aren't introduced in yaml
for e in es:
self.assertFalse({"kernels", "type_alias", "dim_order_alias"} < e.keys())
def test_parse_yaml_files(self) -> None:
custom_ops_yaml_path = None
selector = SelectiveBuilder.get_nop_selector()
use_aten_lib = False
parsed_yaml, custom_ops_parsed_yaml = parse_yaml_files(
aten_yaml_path=self.aten_yaml_path,
tags_yaml_path=self.tags_yaml_path,
native_yaml_path=self.ops_yaml_path,
custom_ops_yaml_path=custom_ops_yaml_path,
selector=selector,
use_aten_lib=use_aten_lib,
)
# Just the default kernel entry
expected_kernel_entry = {"add.out": 1, "mul.out": 1}
self.assertTrue(len(parsed_yaml.native_functions) == len(expected_kernel_entry))
op_entries = parsed_yaml.kernel_index.index
for op_name, kernel_mapping in op_entries.items():
self.assertTrue(
len(kernel_mapping) == expected_kernel_entry.pop(str(op_name))
)
self.assertTrue(len(expected_kernel_entry) == 0)
def tearDown(self) -> None:
import shutil
try:
shutil.rmtree(self.temp_dir)
except OSError:
pass
class TestParseKernelYamlFiles(unittest.TestCase):
def setUp(self) -> None:
self.temp_dir = tempfile.mkdtemp()
self.aten_kernel_yaml_path = os.path.join(
self.temp_dir, "test_kernel_native_functions.yaml"
)
with open(self.aten_kernel_yaml_path, "w") as f:
f.write(TEST_KERNEL_YAML)
self.ops_yaml_path = os.path.join(self.temp_dir, "test.yaml")
self.tags_yaml_path = os.path.join(self.temp_dir, "tags.yaml")
with open(self.tags_yaml_path, "w") as f:
f.write(
"""
- tag: core
desc: test
"""
)
with open(self.ops_yaml_path, "w") as f:
f.write(
"""
- op: add.out
device_check: NoCheck # TensorIterator
dispatch:
CPU: torch::executor::add_out_kernel
- op: mul.out
device_check: NoCheck # TensorIterator
dispatch:
CPU: torch::executor::mul_out_kernel
"""
)
def test_translate_kernel_native_yaml_writes_correct_data(self) -> None:
out_yaml_path = os.path.join(self.temp_dir, "out2.yaml")
with open(out_yaml_path, "w") as out_file:
translate_native_yaml(
tags_yaml_path=self.tags_yaml_path,
aten_yaml_path=self.aten_kernel_yaml_path,
native_yaml_path=self.ops_yaml_path,
use_aten_lib=False,
out_file=out_file,
)
with open(out_yaml_path) as out_file:
es = yaml.load(out_file, Loader=LineLoader)
self.assertTrue(all("func" in e for e in es))
self.assertTrue(all(e.get("variants") == "function" for e in es))
# Check persistence of kernel fields in yaml
for e in es:
self.assertTrue({"kernels", "type_alias", "dim_order_alias"} < e.keys())
def test_parse_yaml_files(self) -> None:
custom_ops_yaml_path = None
selector = SelectiveBuilder.get_nop_selector()
use_aten_lib = False
parsed_yaml, custom_ops_parsed_yaml = parse_yaml_files(
aten_yaml_path=self.aten_kernel_yaml_path,
tags_yaml_path=self.tags_yaml_path,
native_yaml_path=self.ops_yaml_path,
custom_ops_yaml_path=custom_ops_yaml_path,
selector=selector,
use_aten_lib=use_aten_lib,
)
expected_kernel_entry = {"add.out": 9, "mul.out": 2}
self.assertTrue(len(parsed_yaml.native_functions) == len(expected_kernel_entry))
op_entries = parsed_yaml.kernel_index.index
for op_name, kernel_mapping in op_entries.items():
self.assertTrue(
len(kernel_mapping) == expected_kernel_entry.pop(str(op_name))
)
self.assertTrue(len(expected_kernel_entry) == 0)
def tearDown(self) -> None:
import shutil
try:
shutil.rmtree(self.temp_dir)
except OSError:
pass
class TestGenFunctionsDeclarations(unittest.TestCase):
def setUp(self) -> None:
(
self.custom_1_native_function,
custom_1_backend_index,
) = NativeFunction.from_yaml(
{"func": "custom_1::op_1() -> bool", "dispatch": {"CPU": "kernel_1"}},
loc=Location(__file__, 1),
valid_tags=set(),
)
(
self.custom_2_native_function,
custom_2_backend_index,
) = NativeFunction.from_yaml(
{
"func": "custom_2::op_2() -> bool",
"dispatch": {"CPU": "kernel_2"},
},
loc=Location(__file__, 1),
valid_tags=set(),
)
(
self.custom_3_native_function,
custom_3_backend_index,
) = NativeFunction.from_yaml(
{
"func": "custom_3::op_3(Tensor(a!) self, Tensor x) -> Tensor(a!)",
"dispatch": {"CPU": "kernel_3"},
"variants": "method",
},
loc=Location(__file__, 1),
valid_tags=set(),
)
backend_indices: dict[DispatchKey, dict[OperatorName, BackendMetadata]] = {
DispatchKey.CPU: {},
DispatchKey.QuantizedCPU: {},
}
BackendIndex.grow_index(backend_indices, custom_1_backend_index)
BackendIndex.grow_index(backend_indices, custom_2_backend_index)
self.static_dispatch_idx = [
BackendIndex(
dispatch_key=k,
use_out_as_primary=True,
external=False,
device_guard=False,
index=backend_indices[k],
)
for k in backend_indices
]
self.kernel_index = ETKernelIndex.from_backend_indices(backend_indices)
def test_operators_with_different_namespaces_are_grouped_correctly(self) -> None:
declarations = gen_functions_declarations(
native_functions=[
self.custom_1_native_function,
self.custom_2_native_function,
],
kernel_index=self.kernel_index,
selector=SelectiveBuilder.get_nop_selector(),
use_aten_lib=False,
)
self.assertTrue(
"""
namespace custom_1 {
// custom_1::op_1() -> bool
TORCH_API inline bool op_1(torch::executor::KernelRuntimeContext & context) {
return ::at::native::kernel_1(context);
}
} // namespace custom_1
"""
in declarations
)
self.assertTrue(
"""
namespace custom_2 {
// custom_2::op_2() -> bool
TORCH_API inline bool op_2(torch::executor::KernelRuntimeContext & context) {
return ::at::native::kernel_2(context);
}
} // namespace custom_2
"""
in declarations
)
def test_aten_lib_has_context_arg(self) -> None:
declarations = gen_functions_declarations(
native_functions=[
self.custom_1_native_function,
],
kernel_index=self.kernel_index,
selector=SelectiveBuilder.get_nop_selector(),
use_aten_lib=True,
)
self.assertTrue(
"""
namespace custom_1 {
// custom_1::op_1() -> bool
TORCH_API inline bool op_1(torch::executor::KernelRuntimeContext & context) {
return at::op_1();
}
} // namespace custom_1
"""
in declarations
)
def test_aten_lib_method_variant(self) -> None:
declarations = gen_functions_declarations(
native_functions=[
self.custom_3_native_function,
],
kernel_index=self.kernel_index,
selector=SelectiveBuilder.get_nop_selector(),
use_aten_lib=True,
)
self.assertTrue(
"""
namespace custom_3 {
// custom_3::op_3(Tensor(a!) self, Tensor x) -> Tensor(a!)
TORCH_API inline at::Tensor & op_3(torch::executor::KernelRuntimeContext & context, at::Tensor & self, const at::Tensor & x) {
return self.op_3(x);
}
} // namespace custom_3
"""
in declarations
)
class TestComputeCodegenUnboxedKernels(unittest.TestCase):
def setUp(self) -> None:
(
self.native_function_no_kern,
_,
) = NativeFunction.from_yaml(
{
"func": "custom_1::op_1() -> bool",
"dispatch": {"CPU": "unused_kernel_1"},
},
loc=Location(__file__, 1),
valid_tags=set(),
)
self.default_kernel_key = ETKernelKey(default=True)
self.default_backend_metadata = BackendMetadata(
"default_kernel", False, "at::native"
)
self.default_kernel_entry = (
[self.default_kernel_key],
self.default_backend_metadata,
)
def test_codegen_unboxed_specialized(self) -> None:
specialized_kernel_key = ETKernelKey.gen_from_yaml(
{"self": ("T0", "D0"), "other": ("T0", "D0"), "out": ("T0", "D0")},
{"T0": ["Double"]},
{"D0": [0, 1, 2, 3]},
)
selector = SelectiveBuilder.from_yaml_dict(
{
"include_all_operators": True,
"et_kernel_metadata": {
"custom_1::op_1": ["v1/7;0,1,2,3|7;0,1,2,3|7;0,1,2,3"]
},
}
)
use_aten_lib = False
entry = (
self.native_function_no_kern,
(specialized_kernel_key, self.default_backend_metadata),
)
result = ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary=False
)(entry)
# Concat used to prevent whitespace stripping
expected_str = (
"""
Kernel(
"custom_1::op_1",
"v1/7;0,1,2,3|7;0,1,2,3|7;0,1,2,3",
[](torch::executor::KernelRuntimeContext & context, EValue** stack) {
"""
+ """
internal::EventTracerProfileOpScope event_tracer_op_scope(context.internal_event_tracer(), "native_call_op_1");
EXECUTORCH_SCOPE_PROF("native_call_op_1");
bool result_ = at::native::default_kernel(context, );
internal::event_tracer_log_evalue(context.internal_event_tracer(), *stack[0]);
*stack[0] = EValue(result_);
}
),
"""
)
self.assertEqual(expected_str, result)
def test_codegen_unboxed_specialized_not_matching(self) -> None:
specialized_kernel_key = ETKernelKey.gen_from_yaml(
{"self": ("T0", "D0"), "other": ("T0", "D0"), "out": ("T0", "D0")},
{"T0": ["Double"]},
{"D0": [0, 1, 2, 3]},
)
selector = SelectiveBuilder.from_yaml_dict(
{
"include_all_operators": True,
"et_kernel_metadata": {
"custom_1::op_1": ["v1/8;0,1,2,3|7;0,1,2,3|7;0,1,2,3"]
},
}
)
use_aten_lib = False
entry = (
self.native_function_no_kern,
(specialized_kernel_key, self.default_backend_metadata),
)
self.assertRaises(
Exception,
ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary=False
),
entry,
)
def test_codegen_unboxed_specialized_missing_root_op(self) -> None:
specialized_kernel_key = ETKernelKey.gen_from_yaml(
{"self": ("T0", "D0"), "other": ("T0", "D0"), "out": ("T0", "D0")},
{"T0": ["Double"]},
{"D0": [0, 1, 2, 3]},
)
selector = SelectiveBuilder.from_yaml_dict(
{
"et_kernel_metadata": {
"custom_1::op_1": ["v1/7;0,1,2,3|7;0,1,2,3|7;0,1,2,3"]
}
}
)
use_aten_lib = False
entry = (
self.native_function_no_kern,
(specialized_kernel_key, self.default_backend_metadata),
)
for add_exception_boundary in (True, False):
result = ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary
)(entry)
# Concat used to prevent whitespace stripping
expected_str = """"""
self.assertEqual(expected_str, result)
def test_codegen_unboxed_default(self) -> None:
"""
This test checks that if there is no specialized kernel, the default kernel is used.
"""
selector = SelectiveBuilder.from_yaml_dict(
{
"include_all_operators": True,
"et_kernel_metadata": {
"custom_1::op_1": ["v1/7;0,1,2,3|7;0,1,2,3|7;0,1,2,3"]
},
}
)
use_aten_lib = False
entry = (self.native_function_no_kern, self.default_kernel_entry)
result = ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary=False
)(entry)
# Concat used to prevent whitespace stripping
expected_str = (
"""
Kernel(
"custom_1::op_1",
[](torch::executor::KernelRuntimeContext & context, EValue** stack) {
"""
+ """
internal::EventTracerProfileOpScope event_tracer_op_scope(context.internal_event_tracer(), "native_call_op_1");
EXECUTORCH_SCOPE_PROF("native_call_op_1");
bool result_ = at::native::default_kernel(context, );
internal::event_tracer_log_evalue(context.internal_event_tracer(), *stack[0]);
*stack[0] = EValue(result_);
}
),
"""
)
self.assertEqual(expected_str, result)
result = ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary=True
)(entry)
# Concat used to prevent whitespace stripping
expected_str = (
"""
Kernel(
"custom_1::op_1",
[](torch::executor::KernelRuntimeContext & context, EValue** stack) {
"""
+ """
try {
internal::EventTracerProfileOpScope event_tracer_op_scope(context.internal_event_tracer(), "native_call_op_1");
EXECUTORCH_SCOPE_PROF("native_call_op_1");
bool result_ = at::native::default_kernel(context, );
internal::event_tracer_log_evalue(context.internal_event_tracer(), *stack[0]);
*stack[0] = EValue(result_);
} catch (const std::exception& ex) {
ET_LOG(Error, "Kernel threw an exception: %s", ex.what());
context.fail(torch::executor::Error::Internal);
}
}
),
"""
)
self.maxDiff = None
self.assertEqual(expected_str, result)
def test_codegen_unboxed_default_kernel_key_selected(self) -> None:
"""
This test checks that if there is no specialized kernel, the default kernel is used, when the selector only has default key.
"""
selector = SelectiveBuilder.from_yaml_dict(
{
"include_all_operators": True,
"et_kernel_metadata": {"custom_1::op_1": ["default"]},
}
)
use_aten_lib = False
entry = (self.native_function_no_kern, self.default_kernel_entry)
result = ComputeCodegenUnboxedKernels(
selector, use_aten_lib, add_exception_boundary=False
)(entry)
# Concat used to prevent whitespace stripping
expected_str = (
"""
Kernel(
"custom_1::op_1",
[](torch::executor::KernelRuntimeContext & context, EValue** stack) {
"""
+ """
internal::EventTracerProfileOpScope event_tracer_op_scope(context.internal_event_tracer(), "native_call_op_1");
EXECUTORCH_SCOPE_PROF("native_call_op_1");
bool result_ = at::native::default_kernel(context, );
internal::event_tracer_log_evalue(context.internal_event_tracer(), *stack[0]);
*stack[0] = EValue(result_);
}
),
"""
)
self.assertEqual(expected_str, result)

View File

@ -0,0 +1,59 @@
import unittest
from torchgen.executorch.api.types import ExecutorchCppSignature
from torchgen.local import parametrize
from torchgen.model import Location, NativeFunction
DEFAULT_NATIVE_FUNCTION, _ = NativeFunction.from_yaml(
{"func": "foo.out(Tensor input, *, Tensor(a!) out) -> Tensor(a!)"},
loc=Location(__file__, 1),
valid_tags=set(),
)
class ExecutorchCppSignatureTest(unittest.TestCase):
def setUp(self) -> None:
self.sig = ExecutorchCppSignature.from_native_function(DEFAULT_NATIVE_FUNCTION)
def test_runtime_signature_contains_runtime_context(self) -> None:
# test if `KernelRuntimeContext` argument exists in `RuntimeSignature`
with parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
):
args = self.sig.arguments(include_context=True)
self.assertEqual(len(args), 3)
self.assertTrue(any(a.name == "context" for a in args))
def test_runtime_signature_does_not_contain_runtime_context(self) -> None:
# test if `KernelRuntimeContext` argument is missing in `RuntimeSignature`
with parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
):
args = self.sig.arguments(include_context=False)
self.assertEqual(len(args), 2)
self.assertFalse(any(a.name == "context" for a in args))
def test_runtime_signature_declaration_correct(self) -> None:
with parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
):
decl = self.sig.decl(include_context=True)
self.assertEqual(
decl,
(
"torch::executor::Tensor & foo_outf("
"torch::executor::KernelRuntimeContext & context, "
"const torch::executor::Tensor & input, "
"torch::executor::Tensor & out)"
),
)
no_context_decl = self.sig.decl(include_context=False)
self.assertEqual(
no_context_decl,
(
"torch::executor::Tensor & foo_outf("
"const torch::executor::Tensor & input, "
"torch::executor::Tensor & out)"
),
)

View File

@ -0,0 +1,114 @@
import unittest
from torchgen import local
from torchgen.api.types import (
BaseCType,
boolT,
ConstRefCType,
CType,
longT,
MutRefCType,
NamedCType,
OptionalCType,
TupleCType,
VectorCType,
voidT,
)
from torchgen.executorch.api.et_cpp import argument_type, return_type, returns_type
from torchgen.executorch.api.types import ArrayRefCType, scalarT, tensorListT, tensorT
from torchgen.model import Argument, FunctionSchema, Return
class ExecutorchCppTest(unittest.TestCase):
"""
Test torchgen.executorch.api.cpp
"""
def _test_argumenttype_type(self, arg_str: str, expected: NamedCType) -> None:
arg = Argument.parse(arg_str)
self.assertEqual(str(argument_type(arg, binds=arg.name)), str(expected))
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_argumenttype_type(self) -> None:
data = [
("Tensor self", NamedCType("self", ConstRefCType(BaseCType(tensorT)))),
("Tensor(a!) out", NamedCType("out", MutRefCType(BaseCType(tensorT)))),
(
"Tensor? opt",
NamedCType("opt", ConstRefCType(OptionalCType(BaseCType(tensorT)))),
),
("Scalar scalar", NamedCType("scalar", ConstRefCType(BaseCType(scalarT)))),
(
"Scalar? scalar",
NamedCType("scalar", ConstRefCType(OptionalCType(BaseCType(scalarT)))),
),
("int[] size", NamedCType("size", ArrayRefCType(BaseCType(longT)))),
("int? dim", NamedCType("dim", OptionalCType(BaseCType(longT)))),
("Tensor[] weight", NamedCType("weight", BaseCType(tensorListT))),
(
"Scalar[] spacing",
NamedCType("spacing", ArrayRefCType(ConstRefCType(BaseCType(scalarT)))),
),
(
"Tensor?[] weight",
NamedCType("weight", ArrayRefCType(OptionalCType(BaseCType(tensorT)))),
),
(
"SymInt[]? output_size",
NamedCType(
"output_size", OptionalCType(ArrayRefCType(BaseCType(longT)))
),
),
(
"int[]? dims",
NamedCType("dims", OptionalCType(ArrayRefCType(BaseCType(longT)))),
),
(
"bool[3] output_mask",
NamedCType("output_mask", ArrayRefCType(BaseCType(boolT))),
),
]
for d in data:
self._test_argumenttype_type(*d)
def _test_returntype_type(self, ret_str: str, expected: CType) -> None:
ret = Return.parse(ret_str)
self.assertEqual(str(return_type(ret)), str(expected))
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_returntype_type(self) -> None:
data = [
("Tensor", BaseCType(tensorT)),
("Tensor(a!)", MutRefCType(BaseCType(tensorT))),
("Tensor[]", VectorCType(BaseCType(tensorT))),
]
for d in data:
self._test_returntype_type(*d)
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_returns_type(self) -> None:
func = FunctionSchema.parse(
"min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)"
)
expected = TupleCType([BaseCType(tensorT), BaseCType(tensorT)])
self.assertEqual(str(returns_type(func.returns)), str(expected))
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_void_return_type(self) -> None:
func = FunctionSchema.parse(
"_foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()"
)
expected = BaseCType(voidT)
self.assertEqual(str(returns_type(func.returns)), str(expected))
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,176 @@
import unittest
from types import ModuleType
from torchgen import local
from torchgen.api import cpp as aten_cpp, types as aten_types
from torchgen.api.types import (
ArgName,
BaseCType,
ConstRefCType,
MutRefCType,
NamedCType,
)
from torchgen.executorch.api import et_cpp as et_cpp, types as et_types
from torchgen.executorch.api.unboxing import Unboxing
from torchgen.model import BaseTy, BaseType, ListType, OptionalType, Type
def aten_argumenttype_type_wrapper(
t: Type, *, mutable: bool, binds: ArgName, remove_non_owning_ref_types: bool = False
) -> NamedCType:
return aten_cpp.argumenttype_type(
t,
mutable=mutable,
binds=binds,
remove_non_owning_ref_types=remove_non_owning_ref_types,
)
ATEN_UNBOXING = Unboxing(argument_type_gen=aten_argumenttype_type_wrapper)
ET_UNBOXING = Unboxing(argument_type_gen=et_cpp.argumenttype_type)
class TestUnboxing(unittest.TestCase):
"""
Could use torch.testing._internal.common_utils to reduce boilerplate.
GH CI job doesn't build torch before running tools unit tests, hence
manually adding these parametrized tests.
"""
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_symint_argument_translate_ctype_aten(self) -> None:
# test if `SymInt[]` JIT argument can be translated into C++ argument correctly.
# should be `IntArrayRef` due to the fact that Executorch doesn't use symint sig.
# pyre-fixme[16]: `enum.Enum` has no attribute `SymInt`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
symint_list_type = ListType(elem=BaseType(BaseTy.SymInt), size=None)
out_name, ctype, _, _ = ATEN_UNBOXING.argumenttype_evalue_convert(
t=symint_list_type, arg_name="size", mutable=False
)
self.assertEqual(out_name, "size_list_out")
self.assertIsInstance(ctype, BaseCType)
# pyre-fixme[16]:
self.assertEqual(ctype, aten_types.BaseCType(aten_types.intArrayRefT))
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def test_symint_argument_translate_ctype_executorch(self) -> None:
# test if `SymInt[]` JIT argument can be translated into C++ argument correctly.
# should be `IntArrayRef` due to the fact that Executorch doesn't use symint sig.
# pyre-fixme[16]: `enum.Enum` has no attribute `SymInt`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
symint_list_type = ListType(elem=BaseType(BaseTy.SymInt), size=None)
out_name, ctype, _, _ = ET_UNBOXING.argumenttype_evalue_convert(
t=symint_list_type, arg_name="size", mutable=False
)
self.assertEqual(out_name, "size_list_out")
self.assertIsInstance(ctype, et_types.ArrayRefCType)
# pyre-fixme[16]:
self.assertEqual(
ctype, et_types.ArrayRefCType(elem=BaseCType(aten_types.longT))
)
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def _test_const_tensor_argument_translate_ctype(
self, unboxing: Unboxing, types: ModuleType
) -> None:
# pyre-fixme[16]: `enum.Enum` has no attribute `Tensor`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
tensor_type = BaseType(BaseTy.Tensor)
out_name, ctype, _, _ = unboxing.argumenttype_evalue_convert(
t=tensor_type, arg_name="self", mutable=False
)
self.assertEqual(out_name, "self_base")
# pyre-fixme[16]:
self.assertEqual(ctype, ConstRefCType(BaseCType(types.tensorT)))
def test_const_tensor_argument_translate_ctype_aten(self) -> None:
self._test_const_tensor_argument_translate_ctype(ATEN_UNBOXING, aten_types)
def test_const_tensor_argument_translate_ctype_executorch(self) -> None:
self._test_const_tensor_argument_translate_ctype(ET_UNBOXING, et_types)
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def _test_mutable_tensor_argument_translate_ctype(
self, unboxing: Unboxing, types: ModuleType
) -> None:
# pyre-fixme[16]: `enum.Enum` has no attribute `Tensor`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
tensor_type = BaseType(BaseTy.Tensor)
out_name, ctype, _, _ = unboxing.argumenttype_evalue_convert(
t=tensor_type, arg_name="out", mutable=True
)
self.assertEqual(out_name, "out_base")
# pyre-fixme[16]:
self.assertEqual(ctype, MutRefCType(BaseCType(types.tensorT)))
def test_mutable_tensor_argument_translate_ctype_aten(self) -> None:
self._test_mutable_tensor_argument_translate_ctype(ATEN_UNBOXING, aten_types)
def test_mutable_tensor_argument_translate_ctype_executorch(self) -> None:
self._test_mutable_tensor_argument_translate_ctype(ET_UNBOXING, et_types)
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def _test_tensor_list_argument_translate_ctype(
self, unboxing: Unboxing, types: ModuleType
) -> None:
# pyre-fixme[16]: `enum.Enum` has no attribute `Tensor`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
tensor_list_type = ListType(elem=BaseType(BaseTy.Tensor), size=None)
out_name, ctype, _, _ = unboxing.argumenttype_evalue_convert(
t=tensor_list_type, arg_name="out", mutable=True
)
self.assertEqual(out_name, "out_list_out")
# pyre-fixme[16]:
self.assertEqual(ctype, BaseCType(types.tensorListT))
def test_tensor_list_argument_translate_ctype_aten(self) -> None:
self._test_tensor_list_argument_translate_ctype(ATEN_UNBOXING, aten_types)
def test_tensor_list_argument_translate_ctype_executorch(self) -> None:
self._test_tensor_list_argument_translate_ctype(ET_UNBOXING, et_types)
@local.parametrize(
use_const_ref_for_mutable_tensors=False, use_ilistref_for_tensor_lists=False
)
def _test_optional_int_argument_translate_ctype(
self, unboxing: Unboxing, types: ModuleType
) -> None:
# pyre-fixme[16]: `enum.Enum` has no attribute `Tensor`
# pyre-fixme[19]: Call `BaseType.__init__` expects 0 positional arguments, 1 was provided.
optional_int_type = OptionalType(elem=BaseType(BaseTy.int))
out_name, ctype, _, _ = unboxing.argumenttype_evalue_convert(
t=optional_int_type, arg_name="something", mutable=True
)
self.assertEqual(out_name, "something_opt_out")
# pyre-fixme[16]:
self.assertEqual(ctype, types.OptionalCType(BaseCType(types.longT)))
def test_optional_int_argument_translate_ctype_aten(self) -> None:
self._test_optional_int_argument_translate_ctype(ATEN_UNBOXING, aten_types)
def test_optional_int_argument_translate_ctype_executorch(self) -> None:
self._test_optional_int_argument_translate_ctype(ET_UNBOXING, et_types)

View File

@ -298,3 +298,45 @@ operators:
valid_tags=set(),
)
self.assertTrue(selector.is_native_function_selected(native_function))
class TestExecuTorchSelectiveBuild(unittest.TestCase):
def test_et_kernel_selected(self) -> None:
yaml_config = """
et_kernel_metadata:
aten::add.out:
- "v1/6;0,1|6;0,1|6;0,1|6;0,1"
aten::sub.out:
- "v1/6;0,1|6;0,1|6;0,1|6;0,1"
"""
selector = SelectiveBuilder.from_yaml_str(yaml_config)
self.assertListEqual(
["v1/6;0,1|6;0,1|6;0,1|6;0,1"],
selector.et_get_selected_kernels(
"aten::add.out",
[
"v1/6;0,1|6;0,1|6;0,1|6;0,1",
"v1/3;0,1|3;0,1|3;0,1|3;0,1",
"v1/6;1,0|6;0,1|6;0,1|6;0,1",
],
),
)
self.assertListEqual(
["v1/6;0,1|6;0,1|6;0,1|6;0,1"],
selector.et_get_selected_kernels(
"aten::sub.out", ["v1/6;0,1|6;0,1|6;0,1|6;0,1"]
),
)
self.assertListEqual(
[],
selector.et_get_selected_kernels(
"aten::mul.out", ["v1/6;0,1|6;0,1|6;0,1|6;0,1"]
),
)
# We don't use version for now.
self.assertListEqual(
["v2/6;0,1|6;0,1|6;0,1|6;0,1"],
selector.et_get_selected_kernels(
"aten::add.out", ["v2/6;0,1|6;0,1|6;0,1|6;0,1"]
),
)

View File

@ -18,3 +18,13 @@ def define_targets(rules):
rules.requirement("typing-extensions"),
],
)
rules.py_binary(
name = "gen_executorch",
srcs = [":torchgen"],
visibility = ["//visibility:public"],
deps = [
rules.requirement("PyYAML"),
rules.requirement("typing-extensions"),
],
)

View File

View File

View File

@ -0,0 +1,151 @@
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
from typing import TYPE_CHECKING
from torchgen import dest
# disable import sorting to avoid circular dependency.
from torchgen.api.types import DispatcherSignature # usort: skip
from torchgen.context import method_with_native_function
from torchgen.model import BaseTy, BaseType, DispatchKey, NativeFunction, Variant
from torchgen.utils import concatMap, Target
if TYPE_CHECKING:
from collections.abc import Sequence
from torchgen.executorch.model import ETKernelIndex
from torchgen.selective_build.selector import SelectiveBuilder
# Generates RegisterKernelStub.cpp, which provides placeholder kernels for custom operators. This will be used at
# model authoring side.
@dataclass(frozen=True)
class ComputeNativeFunctionStub:
@method_with_native_function
def __call__(self, f: NativeFunction) -> str | None:
if Variant.function not in f.variants:
return None
sig = DispatcherSignature.from_schema(
f.func, prefix=f"wrapper_CPU_{f.func.name.overload_name}_", symint=False
)
assert sig is not None
if len(f.func.returns) == 0:
ret_name = ""
elif len(f.func.returns) == 1:
if f.func.arguments.out:
ret_name = f.func.arguments.out[0].name
else:
ret_name = next(
(
a.name
for a in f.func.arguments.flat_non_out
if a.type == f.func.returns[0].type
),
"",
)
if not ret_name:
# if return type is tensor
if f.func.returns[0].type == BaseType(BaseTy.Tensor):
# Returns an empty tensor
ret_name = "at::Tensor()"
else:
raise Exception( # noqa: TRY002
f"Can't handle this return type {f.func}"
) # noqa: TRY002
elif len(f.func.arguments.out) == len(f.func.returns):
# Returns a tuple of out arguments
tensor_type = "at::Tensor &"
comma = ", "
ret_name = f"""::std::tuple<{comma.join([tensor_type] * len(f.func.returns))}>(
{comma.join([r.name for r in f.func.arguments.out])}
)"""
else:
assert all(a.type == BaseType(BaseTy.Tensor) for a in f.func.returns), (
f"Only support tensor returns but got {f.func.returns}"
)
# Returns a tuple of empty tensors
tensor_type = "at::Tensor"
comma = ", "
ret_name = f"""::std::tuple<{comma.join([tensor_type] * len(f.func.returns))}>(
{comma.join(["at::Tensor()" for _ in f.func.returns])}
)"""
ret_str = f"return {ret_name};" if len(f.func.returns) > 0 else ""
return f"""
{sig.defn()} {{
{ret_str}
}}
"""
def gen_custom_ops_registration(
*,
native_functions: Sequence[NativeFunction],
selector: SelectiveBuilder,
kernel_index: ETKernelIndex,
rocm: bool,
) -> tuple[str, str]:
"""
Generate custom ops registration code for dest.RegisterDispatchKey.
:param native_functions: a sequence of `NativeFunction`
:param selector: for selective build.
:param kernel_index: kernels for all the ops.
:param rocm: bool for dest.RegisterDispatchKey.
:return: generated C++ code to register custom operators into PyTorch
"""
# convert kernel index to BackendIndex. This is because we can't handle ETKernelIndex yet.
# TODO larryliu: evaluate if this code is still needed. If yes let it handle ETKernelIndex.
dispatch_key = DispatchKey.CPU
backend_index = kernel_index._to_backend_index()
static_init_dispatch_registrations = ""
ns_grouped_native_functions: dict[str, list[NativeFunction]] = defaultdict(list)
for native_function in native_functions:
ns_grouped_native_functions[native_function.namespace].append(native_function)
for namespace, functions in ns_grouped_native_functions.items():
if len(functions) == 0:
continue
dispatch_registrations_body = "\n".join(
list(
concatMap(
dest.RegisterDispatchKey(
backend_index,
Target.REGISTRATION,
selector,
rocm=rocm,
symint=False,
class_method_name=None,
skip_dispatcher_op_registration=False,
),
functions,
)
)
)
static_init_dispatch_registrations += f"""
TORCH_LIBRARY_IMPL({namespace}, {dispatch_key}, m) {{
{dispatch_registrations_body}
}}"""
anonymous_definition = "\n".join(
list(
concatMap(
dest.RegisterDispatchKey(
backend_index,
Target.ANONYMOUS_DEFINITION,
selector,
rocm=rocm,
symint=False,
class_method_name=None,
skip_dispatcher_op_registration=False,
),
native_functions,
)
)
)
return anonymous_definition, static_init_dispatch_registrations

View File

@ -0,0 +1,367 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from typing_extensions import assert_never
from torchgen import local
from torchgen.api.types import (
ArgName,
BaseCType,
Binding,
ConstRefCType,
CType,
MutRefCType,
NamedCType,
SpecialArgName,
TupleCType,
VectorCType,
voidT,
)
from torchgen.executorch.api.types import (
ArrayRefCType,
BaseTypeToCppMapping,
OptionalCType,
scalarT,
tensorListT,
tensorT,
)
from torchgen.model import (
Argument,
Arguments,
BaseTy,
BaseType,
ListType,
NativeFunction,
OptionalType,
Return,
SelfArgument,
TensorOptionsArguments,
Type,
)
if TYPE_CHECKING:
from collections.abc import Sequence
"""
This file describes the translation of JIT schema to the public C++ API, which is what people use when they call
functions like at::add. It also serves as a native function API, which is the signature of kernels,
since in Executorch CppSignature is the same as NativeSignature.
Difference between this file and torchgen.api.cpp.py:
- Executorch doesn't support TensorOptions, however in this file we still keep the logic here to be compatible with
torchgen.api.cpp, so that we can do stuff like ATen mode (running ATen kernels in Executorch).
- Executorch doesn't support Dimname.
- Executorch runtime doesn't support SymInt, will treat it as int.
"""
# Translation of "value types" in JIT schema to C++ API type. Value
# types look the same no matter if they are argument types or return
# types. Returns None if the type in question is not a value type.
def valuetype_type(
t: Type,
*,
binds: ArgName,
) -> NamedCType | None:
if isinstance(t, BaseType):
if t.name == BaseTy.Tensor or t.name == BaseTy.Scalar:
return None
# For SymInt we simply treat it as int.
elif str(t) == "SymInt":
return NamedCType(binds, BaseCType(BaseTypeToCppMapping[BaseTy.int]))
# All other BaseType currently map directly to BaseCppTypes.
return NamedCType(binds, BaseCType(BaseTypeToCppMapping[t.name]))
elif isinstance(t, OptionalType):
elem = valuetype_type(t.elem, binds=binds)
if elem is None:
return None
return NamedCType(binds, OptionalCType(elem.type))
elif isinstance(t, ListType):
if str(t.elem) == "bool":
assert t.size is not None
return NamedCType(
binds, ArrayRefCType(BaseCType(BaseTypeToCppMapping[BaseTy.bool]))
)
else:
return None
else:
raise AssertionError(f"unrecognized type {repr(t)}")
# Translation of types occurring in JIT arguments to a C++ argument type.
# If remove_non_owning_ref_types is set, we'll guarantee that the outputed CType is not a non-owning reference type.
# For example, we'll return std::vector<int> instead of IntArrayRef.
# See Note [translation from C++ reference to value types]
def argumenttype_type(
t: Type,
*,
mutable: bool,
binds: ArgName,
remove_non_owning_ref_types: bool = False,
) -> NamedCType:
# If it's a value type, do the value type translation
r = valuetype_type(
t,
binds=binds,
)
if r is not None:
return r
if isinstance(t, BaseType):
if t.name == BaseTy.Tensor:
if mutable and not local.use_const_ref_for_mutable_tensors():
return NamedCType(binds, MutRefCType(BaseCType(tensorT)))
else:
return NamedCType(binds, ConstRefCType(BaseCType(tensorT)))
elif t.name == BaseTy.Scalar:
return NamedCType(binds, ConstRefCType(BaseCType(scalarT)))
else:
raise AssertionError(f"base type should have been value type {t}")
elif isinstance(t, OptionalType):
if str(t.elem) == "Tensor":
if mutable and not local.use_const_ref_for_mutable_tensors():
return NamedCType(
binds, MutRefCType(BaseCType(tensorT))
) # TODO: fix this discrepancy
else:
return NamedCType(
binds, ConstRefCType(OptionalCType(BaseCType(tensorT)))
)
elif str(t.elem) == "Scalar":
return NamedCType(binds, ConstRefCType(OptionalCType(BaseCType(scalarT))))
elem = argumenttype_type(t.elem, mutable=mutable, binds=binds)
return NamedCType(binds, OptionalCType(elem.type))
elif isinstance(t, ListType):
# TODO: keeping these special cases for Tensor[] and Tensor?[] so that we can hookup with ATen kernels.
if str(t.elem) == "Tensor":
return NamedCType(binds, BaseCType(tensorListT))
elif str(t.elem) == "Dimname":
raise NotImplementedError("Executorch doesn't support Dimname")
elif str(t.elem) == "Tensor?":
return NamedCType(binds, ArrayRefCType(OptionalCType(BaseCType(tensorT))))
elem = argumenttype_type(t.elem, mutable=mutable, binds=binds)
return NamedCType(binds, ArrayRefCType(elem.type))
else:
raise AssertionError(f"unrecognized type {repr(t)}")
# Translate a JIT argument into its C++ type
def argument_type(a: Argument, *, binds: ArgName) -> NamedCType:
return argumenttype_type(a.type, mutable=a.is_write, binds=binds)
# Translation of a (non-multi) return type from JIT to C++
# N.B: returntype_type returns a CType, not a NamedCType.
# This is mostly because of the mismatch between return types and return names.
# e.g. a function with a return type of 'void' has 0 return names,
# and a function with a return type of 'std::tuple' has >1 return name.
def returntype_type(t: Type, *, mutable: bool) -> CType:
# placeholder is ignored
r = valuetype_type(t, binds="__placeholder__")
if r is not None:
return r.type
if isinstance(t, BaseType):
if t.name == BaseTy.Tensor:
if mutable:
if local.use_const_ref_for_mutable_tensors():
return ConstRefCType(BaseCType(tensorT))
else:
return MutRefCType(BaseCType(tensorT))
else:
# Note [Tensor Copy Returns]
# Currently, we use "Argument.is_write" to determine
# whether or not Tensor return types should be copies or references.
# If that ever changes, take a look at other locations of this note!
return BaseCType(tensorT)
elif t.name == BaseTy.Scalar:
return BaseCType(scalarT)
elif isinstance(t, ListType):
assert not mutable, (
"Native functions should never return a mutable tensor list. They should return void."
)
elem = returntype_type(t.elem, mutable=False)
assert t.size is None, f"fixed size list returns not supported: {t}"
return VectorCType(elem)
raise AssertionError(f"unrecognized return type {t}")
# Translation of a single return to its C++ type
def return_type(r: Return) -> CType:
return returntype_type(r.type, mutable=r.is_write)
# Translation of a full (possibly multi) return from JIT to its C++ type
def returns_type(rs: Sequence[Return]) -> CType:
if len(rs) == 0:
return BaseCType(voidT)
elif len(rs) == 1:
return return_type(rs[0])
else:
return TupleCType([return_type(r) for r in rs])
def return_names(f: NativeFunction, *, fallback_name: str = "result") -> Sequence[str]:
returns: list[str] = []
for i, r in enumerate(f.func.returns):
# If we have an inplace function, the return argument is
# implicitly named self.
# TODO: Consider incorporating this into the data model
if f.func.name.name.inplace:
assert i == 0, "illegal inplace function with multiple returns"
name = "self"
# If we are out function, the name is the name of the
# corresponding output function (r.name will get recorded
# in field_name later.)
elif f.func.is_out_fn():
name = f.func.arguments.out[i].name
# If the return argument is explicitly named...
elif r.name:
name_conflict = any(
r.name == a.name for a in f.func.schema_order_arguments()
)
if name_conflict and not f.func.is_out_fn():
name = f"{r.name}_return"
else:
name = r.name
# If there is no explicit name and no fallback name was passed in, we just name the output result,
# unless it's a multi-return, in which case it's result0,
# result1, etc (zero-indexed)
else:
name = fallback_name if len(f.func.returns) == 1 else f"{fallback_name}{i}"
returns.append(name)
return returns
JIT_TO_CPP_DEFAULT = {
"False": "false",
"True": "true",
"None": "torch::execustd::nullopt", # UGH this one is type directed
"[]": "{}",
"contiguous_format": "torch::executorch::MemoryFormat::Contiguous",
"long": "torch::executorch::kLong",
}
# Convert a JIT default into C++ expression representing the default
def default_expr(d: str, t: Type) -> str:
if d == "None" and str(t) == "Tensor?":
return "{}"
if isinstance(t, BaseType) and t.name is BaseTy.str:
# Schema allows single quotes but C++ needs double
if len(d) >= 2 and d[0] == "'" and d[-1] == "'":
s = ""
i = 1
while i + 1 < len(d):
if d[i] != "\\":
if d[i] == '"':
s += '\\"'
else:
s += d[i]
i += 1
else:
if d[i + 1] == "'":
s += "'"
else:
s += d[i : i + 2]
i += 2
return f'"{s}"'
if isinstance(t, OptionalType):
if d == "None":
return "torch::executor::nullopt"
return default_expr(d, t.elem)
if isinstance(t, ListType):
if d.startswith("[") and d.endswith("]"):
return "{" + d[1:-1] + "}"
elif t.size is None:
# NOTE: Sized lists can have scalar defaults
raise ValueError(f"Expected a list default '[...]' but found: '{d}'")
return JIT_TO_CPP_DEFAULT.get(d, d)
# Convert an argument into its C++ API form
def argument(
a: Argument | TensorOptionsArguments | SelfArgument,
*,
cpp_no_default_args: set[str],
method: bool,
faithful: bool,
has_tensor_options: bool,
) -> list[Binding]:
def sub_argument(
a: Argument | TensorOptionsArguments | SelfArgument,
) -> list[Binding]:
return argument(
a,
cpp_no_default_args=cpp_no_default_args,
method=method,
faithful=faithful,
has_tensor_options=has_tensor_options,
)
if isinstance(a, Argument):
binds: ArgName
if a.name == "memory_format" and has_tensor_options:
binds = SpecialArgName.possibly_redundant_memory_format
else:
binds = a.name
default: str | None = None
if a.name not in cpp_no_default_args and a.default is not None:
default = default_expr(a.default, a.type)
return [
Binding(
nctype=argument_type(a, binds=binds),
name=a.name,
default=default,
argument=a,
)
]
elif isinstance(a, TensorOptionsArguments):
raise NotImplementedError("Need to implement type resolution for TensorOptions")
elif isinstance(a, SelfArgument):
if method:
# Caller is responsible for installing implicit this in context!
return []
else:
return sub_argument(a.argument)
else:
assert_never(a)
def arguments(
arguments: Arguments,
*,
faithful: bool,
method: bool,
cpp_no_default_args: set[str],
) -> list[Binding]:
args: list[Argument | TensorOptionsArguments | SelfArgument] = []
if faithful:
args.extend(arguments.non_out)
args.extend(arguments.out)
else:
args.extend(arguments.out)
args.extend(arguments.non_out)
return [
r.no_default() if faithful else r
for a in args
for r in argument(
a,
faithful=faithful,
method=method,
has_tensor_options=arguments.tensor_options is not None,
cpp_no_default_args=cpp_no_default_args,
)
]

View File

@ -0,0 +1,4 @@
from torchgen.executorch.api.types.types import *
from torchgen.executorch.api.types.signatures import * # usort: skip

View File

@ -0,0 +1,76 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
import torchgen.api.cpp as aten_cpp
from torchgen.executorch.api.types.types import contextArg
if TYPE_CHECKING:
from torchgen.api.types import Binding, CType
from torchgen.model import FunctionSchema, NativeFunction
@dataclass(frozen=True)
class ExecutorchCppSignature:
"""
This signature is merely a CppSignature with Executorch types (optionally
contains KernelRuntimeContext as well). The inline definition of
CppSignature is generated in Functions.h and it's used by unboxing
functions.
"""
# The schema this signature is derived from
func: FunctionSchema
# The set of C++ arguments which should not have defaults applied to them
cpp_no_default_args: set[str]
# Allows you to prepend an arbitrary prefix to the signature name.
# This is useful for parts of the codegen that generate wrappers around kernels,
# and need to avoid naming collisions.
prefix: str = ""
def arguments(self, *, include_context: bool = True) -> list[Binding]:
return ([contextArg] if include_context else []) + et_cpp.arguments(
self.func.arguments,
faithful=True, # always faithful, out argument at the end
method=False, # method not supported
cpp_no_default_args=self.cpp_no_default_args,
)
def name(self) -> str:
return self.prefix + aten_cpp.name(
self.func,
faithful_name_for_out_overloads=True,
)
def decl(self, name: str | None = None, *, include_context: bool = True) -> str:
args_str = ", ".join(
a.decl() for a in self.arguments(include_context=include_context)
)
if name is None:
name = self.name()
return f"{self.returns_type().cpp_type()} {name}({args_str})"
def defn(self, name: str | None = None) -> str:
args = [a.defn() for a in self.arguments()]
args_str = ", ".join(args)
if name is None:
name = self.name()
return f"{self.returns_type().cpp_type()} {name}({args_str})"
def returns_type(self) -> CType:
return et_cpp.returns_type(self.func.returns)
@staticmethod
def from_native_function(
f: NativeFunction, *, prefix: str = ""
) -> ExecutorchCppSignature:
return ExecutorchCppSignature(
func=f.func, prefix=prefix, cpp_no_default_args=f.cpp_no_default_args
)
from torchgen.executorch.api import et_cpp

View File

@ -0,0 +1,77 @@
from __future__ import annotations
from dataclasses import dataclass
from torchgen.api.types import (
BaseCppType,
BaseCType,
Binding,
boolT,
CType,
doubleT,
Expr,
longT,
MutRefCType,
NamedCType,
)
from torchgen.model import BaseTy
halfT = BaseCppType("torch::executor", "Half")
bfloat16T = BaseCppType("torch::executor", "BFloat16")
stringT = BaseCppType("torch::executor", "string_view")
scalarTypeT = BaseCppType("torch::executor", "ScalarType")
tensorT = BaseCppType("torch::executor", "Tensor")
tensorListT = BaseCppType("torch::executor", "TensorList")
scalarT = BaseCppType("torch::executor", "Scalar")
memoryFormatT = BaseCppType("torch::executor", "MemoryFormat")
intArrayRefT = BaseCppType("torch::executor", "IntArrayRef")
optionalT = BaseCppType("torch::executor", "optional")
contextT = BaseCppType("torch::executor", "KernelRuntimeContext")
contextExpr = Expr(
expr="context",
type=NamedCType(name="context", type=MutRefCType(BaseCType(contextT))),
)
contextArg = Binding(
name="context",
nctype=contextExpr.type,
argument=None, # type: ignore[arg-type]
default=None,
)
BaseTypeToCppMapping: dict[BaseTy, BaseCppType] = {
BaseTy.int: longT,
BaseTy.float: doubleT,
BaseTy.bool: boolT,
BaseTy.str: stringT,
BaseTy.ScalarType: scalarTypeT,
BaseTy.Tensor: tensorT,
BaseTy.Scalar: scalarT,
BaseTy.MemoryFormat: memoryFormatT,
}
@dataclass(frozen=True)
class OptionalCType(CType):
elem: CType
def cpp_type(self, *, strip_ref: bool = False) -> str:
# Do not pass `strip_ref` recursively.
return f"torch::executor::optional<{self.elem.cpp_type()}>"
def remove_const_ref(self) -> CType:
return OptionalCType(self.elem.remove_const_ref())
@dataclass(frozen=True)
class ArrayRefCType(CType):
elem: CType
def cpp_type(self, *, strip_ref: bool = False) -> str:
# Do not pass `strip_ref` recursively.
return f"torch::executor::ArrayRef<{self.elem.cpp_type()}>"
def remove_const_ref(self) -> CType:
return ArrayRefCType(self.elem.remove_const_ref())

View File

@ -0,0 +1,218 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable, TYPE_CHECKING
from torchgen.model import (
Argument,
BaseTy,
BaseType,
ListType,
NativeFunction,
OptionalType,
Type,
)
if TYPE_CHECKING:
from collections.abc import Sequence
from torchgen.api.types import Binding, CType, NamedCType
connector = "\n\t"
# Return unboxing function name for a NativeFunction
def name(f: NativeFunction) -> str:
return f.func.name.unambiguous_name()
@dataclass(frozen=True)
class Unboxing:
"""
Takes a sequence of Bindings and unbox EValues to these Bindings. Return generated code that performs correct unboxing.
A sample generated code:
// aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
void mul_out(EValue** stack) {
EValue& self = *stack[0];
EValue& other = *stack[1];
EValue& out = *stack[2];
const torch::executor::Tensor & self_base = self.to<torch::executor::Tensor>();
const torch::executor::Tensor & other_base = other.to<torch::executor::Tensor>();
torch::executor::Tensor & out_base = out.to<torch::executor::Tensor>();
EXECUTORCH_SCOPE_PROF("native_call_mul.out");
torch::executor::mul_outf(self_base, other_base, out_base);
}
"""
# this is a callable that converts a JIT argument, into its C++ type.
# Translates (type, mutability, binds) to NamedCType. E.g., torchgen.api.cpp.argumenttype_type.
argument_type_gen: Callable[
...,
NamedCType,
]
# Convert all the arguments in a NativeFunction to C++ code
def convert_arguments(
self, args: Sequence[Binding]
) -> tuple[list[Binding], list[str]]:
code_list = [f"EValue& {args[i].name} = *stack[{i}];" for i in range(len(args))]
binding_list = []
for arg in args:
# expecting only Argument
if not isinstance(arg.argument, Argument):
raise Exception( # noqa: TRY002
f"Unexpected argument type, expecting `Argument` but got {arg}"
)
argument: Argument = arg.argument
unboxed_name, _, code, decl = self.argumenttype_evalue_convert(
argument.type, argument.name, mutable=argument.is_write
)
code_list.extend(decl)
code_list.extend(code)
binding_list.append(arg.with_name(unboxed_name))
return binding_list, code_list
def argumenttype_evalue_convert(
self, t: Type, arg_name: str, *, mutable: bool = False
) -> tuple[str, CType, list[str], list[str]]:
"""
Takes in the type, name and mutability corresponding to an argument, and generates a tuple of:
(1) the C++ code necessary to unbox the argument
(2) A Binding corresponding to the newly created unboxed variable, including variable name and its CType
:param t: a `Type` of an argument
:param arg_name: argument name
:param mutable: boolean for whether this argument type is mutable
:return: unboxed result
"""
ctype = self.argument_type_gen(t, mutable=mutable, binds=arg_name).type
if isinstance(t, BaseType):
out_name = f"{arg_name}_base"
code, decl = self._gen_code_base_type(
arg_name=arg_name, out_name=out_name, ctype=ctype
)
elif isinstance(t, OptionalType):
out_name = f"{arg_name}_opt_out"
code, decl = self._gen_code_optional_type(
arg_name=arg_name, out_name=out_name, t=t, ctype=ctype
)
elif isinstance(t, ListType):
out_name = f"{arg_name}_list_out"
code, decl = self._gen_code_list_type(
arg_name=arg_name, out_name=out_name, t=t, ctype=ctype
)
else:
raise Exception( # noqa: TRY002
f"Cannot handle type {t}. arg_name: {arg_name}"
) # noqa: TRY002
return out_name, ctype, code, decl
def _gen_code_base_type(
self, arg_name: str, out_name: str, ctype: CType
) -> tuple[list[str], list[str]]:
return [
f"{ctype.cpp_type()} {out_name} = {arg_name}.to<{ctype.cpp_type(strip_ref=True)}>();"
], []
def _gen_code_optional_type(
self, arg_name: str, out_name: str, t: OptionalType, ctype: CType
) -> tuple[list[str], list[str]]:
in_name = f"{arg_name}_opt_in"
res_name, base_type, res_code, decl = self.argumenttype_evalue_convert(
t.elem, in_name
)
return (
f"""
auto {out_name} = {arg_name}.toOptional<{base_type.cpp_type(strip_ref=True)}>();
""".split("\n"),
decl,
)
def _gen_code_list_type(
self, arg_name: str, out_name: str, t: ListType, ctype: CType
) -> tuple[list[str], list[str]]:
in_name = f"{arg_name}_list_in"
elem_name = f"{arg_name}_elem"
code = []
res_name, res_ctype, res_code, decl = self.argumenttype_evalue_convert(
t.elem, elem_name
)
if isinstance(t.elem, BaseType) and t.elem.name == BaseTy.Tensor:
code.extend(
f"""
auto {out_name} = {arg_name}.toTensorList();
""".split("\n")
)
elif isinstance(t.elem, BaseType) and (
t.elem.name == BaseTy.int or t.elem.name == BaseTy.SymInt
):
code.extend(
f"""
auto {out_name} = {arg_name}.toIntList();
""".split("\n")
)
elif isinstance(t.elem, BaseType) and t.elem.name == BaseTy.float:
code.extend(
f"""
auto {out_name} = {arg_name}.toDoubleList();
""".split("\n")
)
elif isinstance(t.elem, BaseType) and t.elem.name == BaseTy.bool:
# handle list type with size, e.g., bool[4]
code.extend(
f"""
#ifdef USE_ATEN_LIB
std::array<bool, {t.size}> {out_name};
auto {in_name} = {arg_name}.toBoolList();
size_t _i = 0;
for (auto {elem_name}: {in_name}) {{
{out_name}[_i++] = {elem_name};
}}
#else
auto {out_name} = {arg_name}.toBoolList();
#endif
""".split("\n")
)
# pytorch codegen:
# we have to use c10::List for optional element. e.g., Tensor?[] -> c10::List<::std::optional<at::Tensor>>
elif (
isinstance(t.elem, OptionalType)
and isinstance(t.elem.elem, BaseType)
and t.elem.elem.name == BaseTy.Tensor
):
code.extend(
f"""
#ifdef USE_ATEN_LIB
auto {in_name} = {arg_name}.toListOptionalTensor();
c10::List<::std::optional<at::Tensor>> {out_name};
for (auto {elem_name}: {in_name}) {{
{out_name}.push_back({elem_name});
}}
#else
auto {out_name} = {arg_name}.toListOptionalTensor();
#endif
""".split("\n")
)
else:
# use ArrayRef as default.
vec_name = arg_name + "_vec"
# need to bring vector instantiation out of scope so that ArrayRef has valid data
decl.append(
f"std::vector<{res_ctype.cpp_type(strip_ref=True)}> {vec_name};"
)
code.extend(
f"""
for (EValue {elem_name}: {in_name}) {{
{connector.join(res_code)}
{vec_name}.push_back({res_name});
}}
{ctype.cpp_type(strip_ref=True)} {out_name}({vec_name});
""".split("\n")
)
return code, decl

View File

@ -0,0 +1,220 @@
# Represents all kernels used by an Executorch model.
# It maintains a dict[OperatorName, dict[ETKernelKey, BackendMetadata]] structure.
from __future__ import annotations
import itertools
from collections import defaultdict, namedtuple
from dataclasses import dataclass
from enum import IntEnum
from typing_extensions import assert_never
from torchgen.model import (
BackendIndex,
BackendMetadata,
DispatchKey,
NativeFunction,
NativeFunctionsGroup,
OperatorName,
)
KERNEL_KEY_VERSION = 1
# TODO: Duplicated Subset from codegen.tool.gen_oplist, remove declaration in codegen
class ScalarType(IntEnum):
Byte = 0
Char = 1
Short = 2
Int = 3
Long = 4
Float = 6
Double = 7
Bool = 11
ETParsedYaml = namedtuple("ETParsedYaml", ["native_functions", "kernel_index"])
@dataclass(frozen=True)
class ETKernelKeyOpArgMeta:
arg_name: str
dtype: str
# The order of the dimensions if entry is a Tensor
dim_order: tuple[int, ...]
def to_native_string(self) -> str:
dtype_str = ScalarType[self.dtype].value
dim_str = str(self.dim_order)[1:-1].replace(" ", "")
return f"{dtype_str};{dim_str}"
@dataclass(frozen=True)
class ETKernelKey:
# Field undefined is default = True
arg_meta: tuple[ETKernelKeyOpArgMeta, ...] = ()
# Indicator for this kernel being used as a catch all
default: bool = False
version: int = KERNEL_KEY_VERSION
@staticmethod
def gen_from_yaml(
args: dict[str, tuple[str, str]],
type_alias_map: dict[str, list[str]], # TODO: Support unwrapped str val
dim_order_alias_map: dict[str, list[int]],
) -> list[ETKernelKey]:
"""Generate ETKernelKeys from arg kernel specs
Multiple ETKernelKeys are returned due to dtype permutations from utilizing
type_alias_map (actualizing each potential type permutation as a KernelKey)
Args:
args: Mapping from argument name to kernel specs
Kernel specs are a tuple of (dtype, dim_order).
Currently tuple entries must be aliased via the alias map arguments
type_alias_map: Mapping from type alias to potential type enums
i.e { T0 : [Double, Int] } means T0 can be either Double or Int
Used for lookup by args
dim_order_alias_map: Mapping from alias to a list of dimension orders
Used for lookup by args
"""
# Cast to dim order to int
dim_order_alias_map = {
k: [int(alias) for alias in v] for k, v in dim_order_alias_map.items()
}
kernel_keys = []
# Get all used Dtype Alias
dtype_alias_used = set()
for type_alias, dim_order in args.values():
# Enforce usage of alias initially
# TODO: Support inlined arguments
assert type_alias in type_alias_map, "Undefined type alias: " + str(
type_alias
)
assert dim_order in dim_order_alias_map, (
f"Undefined dim_order alias: {dim_order}"
)
dtype_alias_used.add(type_alias)
# Generate all permutations of dtype alias values
alias_dtypes = [
[(alias, dtype) for dtype in type_alias_map[alias]]
for alias in dtype_alias_used
]
alias_permutations = [
dict(permutation) for permutation in list(itertools.product(*alias_dtypes))
]
# Using each alias value permutation, generate kernel keys
op_arg_cache = {}
for permutation in alias_permutations:
arg_list = []
for arg_name, arg_spec in args.items():
dtype = permutation[arg_spec[0]]
dim_order = dim_order_alias_map[arg_spec[1]] # type: ignore[assignment]
if (
cache_key := (arg_name, dtype, tuple(dim_order))
) not in op_arg_cache:
op_arg_cache[cache_key] = ETKernelKeyOpArgMeta(*cache_key) # type: ignore[arg-type]
arg_list.append(op_arg_cache[cache_key])
kernel_keys.append(ETKernelKey(tuple(arg_list)))
return kernel_keys
def to_native_string(self) -> str:
if self.default:
return "default"
return (
"v"
+ str(KERNEL_KEY_VERSION)
+ "/"
+ "|".join([arg.to_native_string() for arg in self.arg_meta])
)
@dataclass(frozen=True)
class ETKernelIndex:
index: dict[OperatorName, dict[ETKernelKey, BackendMetadata]]
def has_kernels(self, g: NativeFunction | NativeFunctionsGroup) -> bool:
m = self.get_kernels(g)
return m is not None
def get_kernels(
self, g: NativeFunction | NativeFunctionsGroup
) -> dict[ETKernelKey, BackendMetadata]:
if isinstance(g, NativeFunction):
f = g
elif isinstance(g, NativeFunctionsGroup):
f = g.functional
else:
assert_never(g)
if f.func.name not in self.index:
return {}
return self.index[f.func.name]
@staticmethod
def grow_from_backend_indices(
kernel_index: dict[OperatorName, dict[ETKernelKey, BackendMetadata]],
backend_indices: dict[DispatchKey, dict[OperatorName, BackendMetadata]],
) -> None:
for dk in backend_indices:
index = backend_indices[dk]
for op, backend_metadata in index.items():
if op in kernel_index:
kernel_index[op][ETKernelKey(default=True)] = backend_metadata
else:
kernel_index[op] = {ETKernelKey(default=True): backend_metadata}
@staticmethod
def from_backend_indices(
backend_indices: dict[DispatchKey, dict[OperatorName, BackendMetadata]],
) -> ETKernelIndex:
kernel_index: dict[OperatorName, dict[ETKernelKey, BackendMetadata]] = (
defaultdict(dict)
)
ETKernelIndex.grow_from_backend_indices(kernel_index, backend_indices)
return ETKernelIndex(kernel_index)
def grow(
self, backend_indices: dict[DispatchKey, dict[OperatorName, BackendMetadata]]
) -> ETKernelIndex:
ETKernelIndex.grow_from_backend_indices(self.index, backend_indices)
return self
def _to_backend_index(self) -> BackendIndex:
"""
WARNING: this will be deprecated once all the codegen places know how to handle ETKernelIndex.
"""
index: dict[OperatorName, BackendMetadata] = {}
for op in self.index:
kernel_dict = self.index[op]
assert len(kernel_dict.values()) == 1, (
f"Can't convert ETKernelIndex to BackendIndex because {op} has more than one kernels. Got {kernel_dict}"
)
index[op] = kernel_dict.get(
ETKernelKey(default=True),
BackendMetadata(kernel="", structured=False, cpp_namespace=""),
)
return BackendIndex(
dispatch_key=DispatchKey.CPU,
use_out_as_primary=False,
device_guard=False,
external=False,
index=index,
)
# Note duplicate ETKernelKey from index_b will clobber the metadata from index_a
@staticmethod
def merge_indices(index_a: ETKernelIndex, index_b: ETKernelIndex) -> ETKernelIndex:
combined = defaultdict(dict, index_a.index.copy())
for op, entry in index_b.index.items():
for key, metadata in entry.items():
combined[op][key] = metadata
return ETKernelIndex(combined)

View File

@ -0,0 +1,153 @@
from __future__ import annotations
from collections import defaultdict, namedtuple
from typing import Any
import yaml
from torchgen.executorch.model import ETKernelIndex, ETKernelKey
from torchgen.gen import LineLoader, parse_native_yaml
from torchgen.model import (
BackendMetadata,
DispatchKey,
FunctionSchema,
NativeFunction,
OperatorName,
)
from torchgen.utils import NamespaceHelper
# Parse native_functions.yaml into a sequence of NativeFunctions and ET Backend Indices.
ETParsedYaml = namedtuple("ETParsedYaml", ["native_functions", "et_kernel_indices"])
# Fields in native_functions.yaml used to determine which kernels should be used
ET_FIELDS = ["kernels", "type_alias", "dim_order_alias"]
def parse_from_yaml(ei: dict[str, object]) -> dict[ETKernelKey, BackendMetadata]:
"""Given a loaded yaml representing kernel assignment information, extract the
mapping from `kernel keys` to `BackendMetadata` (the latter representing the kernel instance)
Args:
ei: Dict keys {kernels, type_alias, dim_order_alias}
See ETKernelKey for description of arguments
"""
e = ei.copy()
if (kernels := e.pop("kernels", None)) is None:
return {}
type_alias: dict[str, list[str]] = e.pop("type_alias", {}) # type: ignore[assignment]
dim_order_alias: dict[str, list[str]] = e.pop("dim_order_alias", {}) # type: ignore[assignment]
dim_order_alias.pop("__line__", None)
kernel_mapping: dict[ETKernelKey, BackendMetadata] = {}
for entry in kernels: # type: ignore[attr-defined]
arg_meta = entry.get("arg_meta")
if arg_meta is not None:
arg_meta.pop("__line__")
kernel_name = entry.get("kernel_name")
namespace_helper = NamespaceHelper.from_namespaced_entity(
kernel_name, max_level=3
)
kernel_namespace = namespace_helper.get_cpp_namespace(default="at")
backend_metadata = BackendMetadata(
kernel=namespace_helper.entity_name,
structured=False,
cpp_namespace=(kernel_namespace + "::native"),
)
kernel_keys = (
[ETKernelKey((), default=True)]
if arg_meta is None
else ETKernelKey.gen_from_yaml(arg_meta, type_alias, dim_order_alias) # type: ignore[arg-type]
)
for kernel_key in kernel_keys:
assert kernel_key not in kernel_mapping, (
"Duplicate kernel key: " + str(kernel_key) + " " + str(e)
)
kernel_mapping[kernel_key] = backend_metadata
return kernel_mapping
def parse_et_yaml_struct(es: object) -> ETKernelIndex:
"""Given a loaded yaml representing a list of operators, for each op extract the mapping
of `kernel keys` to `BackendMetadata` (the latter representing the kernel instance
that should be used by the kernel key).
"""
indices: dict[OperatorName, dict[ETKernelKey, BackendMetadata]] = {}
for ei in es: # type: ignore[attr-defined]
e = ei.copy()
funcs = e.pop("func")
assert isinstance(funcs, str), f"not a str: {funcs}"
namespace_helper = NamespaceHelper.from_namespaced_entity(
namespaced_entity=funcs, max_level=1
)
opname = FunctionSchema.parse(namespace_helper.entity_name).name
assert opname not in indices, f"Duplicate func found in yaml: {opname} already"
if len(index := parse_from_yaml(e)) != 0:
indices[opname] = index
return ETKernelIndex(indices)
def extract_kernel_fields(es: object) -> dict[OperatorName, dict[str, Any]]:
"""Given a loaded yaml representing a list of operators, extract the
kernel key related fields indexed by the operator name.
"""
fields: dict[OperatorName, dict[str, Any]] = defaultdict(dict)
for ei in es: # type: ignore[attr-defined]
funcs = ei.get("func")
assert isinstance(funcs, str), f"not a str: {funcs}"
namespace_helper = NamespaceHelper.from_namespaced_entity(
namespaced_entity=funcs, max_level=1
)
opname = FunctionSchema.parse(namespace_helper.entity_name).name
for field in ET_FIELDS:
if (value := ei.get(field)) is not None:
fields[opname][field] = value
return fields
def parse_et_yaml(
path: str,
tags_yaml_path: str,
ignore_keys: set[DispatchKey] | None = None,
skip_native_fns_gen: bool = False,
) -> tuple[list[NativeFunction], dict[OperatorName, dict[str, Any]]]:
"""Parse native_functions.yaml into NativeFunctions and an Operator Indexed Dict
of fields to persist from native_functions.yaml to functions.yaml
"""
with open(path) as f:
es = yaml.load(f, Loader=LineLoader)
et_kernel = extract_kernel_fields(es)
# Remove ET specific fields from entries for BC compatibility
strip_et_fields(es)
native_yaml = parse_native_yaml(
path,
tags_yaml_path,
ignore_keys,
skip_native_fns_gen=skip_native_fns_gen,
loaded_yaml=es,
)
return native_yaml.native_functions, et_kernel
def strip_et_fields(es: object) -> None:
"""Given a loaded yaml representing a list of operators,
remove ET specific fields from every entries for BC compatibility
"""
for entry in es: # type: ignore[attr-defined]
for field in ET_FIELDS:
entry.pop(field, None)

1024
torchgen/gen_executorch.py Normal file

File diff suppressed because it is too large Load Diff