mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Add magic TORCH_MAKE_PYBIND_ENUM_FASTER macro (#163527)
See comment on the macro definition. In short, pybind11 3.x added `py::native_enum`, and also had to add overhead for that new way to bind enums on the critical path for calling functions that take regular old `py::enum_`s as arguments (for example, `__eq__`). Differential Revision: [D82873169](https://our.internmc.facebook.com/intern/diff/D82873169/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/163527 Approved by: https://github.com/ezyang
This commit is contained in:
committed by
PyTorch MergeBot
parent
d7491fb1c1
commit
50c0550f5a
@ -4,6 +4,9 @@
|
||||
#include <ATen/core/enum_tag.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::Tag)
|
||||
|
||||
namespace torch {
|
||||
namespace autograd {
|
||||
void initEnumTag(PyObject* module) {
|
||||
|
@ -143,6 +143,14 @@
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::native::ConvBackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(sdp::SDPBackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::LinalgBackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::BlasBackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::ROCmFABackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::native::BatchNormBackend)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::impl::TorchFunctionDisabledState)
|
||||
|
||||
static PyObject* module;
|
||||
|
||||
static THPGenerator* THPDefaultCPUGenerator = nullptr;
|
||||
|
@ -44,6 +44,9 @@
|
||||
using torch::impl::py_context_manager;
|
||||
using torch::impl::py_context_manager_DEPRECATED;
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::autograd::CreationMeta)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10::DeviceType)
|
||||
|
||||
namespace {
|
||||
|
||||
struct DisableFuncTorch {
|
||||
|
@ -10,6 +10,11 @@
|
||||
#include <c10/cuda/CUDAException.h>
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(cudaError_t)
|
||||
#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(cudaOutputMode_t)
|
||||
#endif
|
||||
|
||||
namespace torch::cuda::shared {
|
||||
|
||||
#ifdef USE_ROCM
|
||||
|
@ -13,6 +13,8 @@ using version_tuple = std::tuple<size_t, size_t, size_t>;
|
||||
#ifdef USE_CUDNN
|
||||
#include <cudnn.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(cudnnRNNMode_t)
|
||||
|
||||
namespace {
|
||||
|
||||
version_tuple getCompileVersion() {
|
||||
|
@ -66,6 +66,14 @@
|
||||
|
||||
#include <torch/custom_class.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::BuiltinCommHookType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::DebugLevel)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::ReduceOp::RedOpType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::ProcessGroup::BackendType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::OpType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::WorkResult)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10d::ErrorType)
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef USE_C10D_NCCL
|
||||
|
@ -19,6 +19,8 @@ static struct PyModuleDef _module =
|
||||
|
||||
PYBIND11_MAKE_OPAQUE(std::vector<uint8_t>)
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(FrameAction)
|
||||
|
||||
namespace torch::dynamo {
|
||||
|
||||
#if IS_PYTHON_3_11_PLUS
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ATen/FunctionalTensorWrapper.h>
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <torch/csrc/functorch/init.h>
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
#include <torch/csrc/utils/python_raii.h>
|
||||
#include <torch/python.h>
|
||||
|
||||
@ -24,6 +25,9 @@
|
||||
|
||||
// This file contains functorch's Python bindings.
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::functorch::TransformType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::functorch::RandomnessType)
|
||||
|
||||
namespace torch::functorch::impl {
|
||||
|
||||
using namespace at::functorch;
|
||||
|
@ -121,6 +121,9 @@
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(MobileOptimizerType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10::SchemaArgType)
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
using c10::AliasInfo;
|
||||
|
@ -75,6 +75,9 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(
|
||||
torch::jit::logging::LockingLogger::AggregationType)
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
using ::c10::Argument;
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include <torch/csrc/monitor/events.h>
|
||||
#include <torch/csrc/monitor/python_init.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::monitor::Aggregation)
|
||||
|
||||
namespace pybind11::detail {
|
||||
template <>
|
||||
struct type_caster<torch::monitor::data_value_t> {
|
||||
|
@ -28,6 +28,11 @@
|
||||
#include <torch/csrc/jit/passes/onnx/shape_type_inference.h>
|
||||
#include <torch/csrc/jit/passes/onnx/unpack_quantized_weights.h>
|
||||
#include <torch/csrc/jit/serialization/export.h>
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(ONNX_NAMESPACE::TensorProto_DataType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::onnx::OperatorExportTypes)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::onnx::TrainingMode)
|
||||
|
||||
namespace torch::onnx {
|
||||
|
||||
|
@ -12,6 +12,12 @@
|
||||
#include <torch/csrc/profiler/standalone/execution_trace_observer.h>
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(at::RecordScope)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::profiler::impl::ProfilerState)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::profiler::impl::ActiveProfilerType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::profiler::impl::ActivityType)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(torch::profiler::impl::EventType)
|
||||
|
||||
struct THPCapturedTraceback {
|
||||
PyObject_HEAD
|
||||
std::shared_ptr<torch::CapturedTraceback> data;
|
||||
|
@ -29,6 +29,28 @@ PYBIND11_DECLARE_HOLDER_TYPE(T, c10::intrusive_ptr<T>, true)
|
||||
PYBIND11_DECLARE_HOLDER_TYPE(T, c10::SingletonOrSharedTypePtr<T>)
|
||||
PYBIND11_DECLARE_HOLDER_TYPE(T, c10::SingletonTypePtr<T>, true)
|
||||
|
||||
// pybind11 3.x's type_caster_enum_type handles both py::native_enum
|
||||
// and py::enum_. py::native_enum is preferred, so it gets checked
|
||||
// first. We still use lots of py::enum_ because we don't (yet?)
|
||||
// require pybind11 3.x, and possibly because the difference is
|
||||
// user-visible. Putting TORCH_MAKE_PYBIND_ENUM_FASTER(T) at global
|
||||
// scope before using py::enum_<T> will cause pybind function calls
|
||||
// that pass arguments of type T to go faster (16% at time of writing,
|
||||
// but they are quite slow currently and the savings is a fixed cost,
|
||||
// so the percentage may be higher after other optimizations for
|
||||
// py::enum_ happen).
|
||||
#ifdef PYBIND11_HAS_NATIVE_ENUM
|
||||
#define TORCH_MAKE_PYBIND_ENUM_FASTER(T) \
|
||||
namespace pybind11::detail { \
|
||||
template <> \
|
||||
struct type_caster_enum_type_enabled<T, void> : std::false_type {}; \
|
||||
template <> \
|
||||
struct type_caster_enum_type_enabled<const T, void> : std::false_type {}; \
|
||||
} // namespace pybind11::detail
|
||||
#else // PYBIND11_HAS_NATIVE_ENUM
|
||||
#define TORCH_MAKE_PYBIND_ENUM_FASTER(T)
|
||||
#endif // PYBIND11_HAS_NATIVE_ENUM
|
||||
|
||||
namespace pybind11::detail {
|
||||
|
||||
// torch.Tensor <-> at::Tensor conversions (without unwrapping)
|
||||
|
@ -34,6 +34,9 @@
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10::DispatchKey)
|
||||
TORCH_MAKE_PYBIND_ENUM_FASTER(c10::impl::TorchDispatchModeKey)
|
||||
|
||||
namespace torch::impl::dispatch {
|
||||
|
||||
// Global storage for leaked Python filenames to ensure they remain valid
|
||||
|
Reference in New Issue
Block a user