Migrate DeviceType to torch/headeronly (#163999)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163999 Approved by: https://github.com/mikaylagawarecki
2025-10-20 21:14:14 +08:00 · 2025-09-30 15:45:41 +00:00
parent ff715366aa
commit 7f3dc45300
5 changed files with 187 additions and 98 deletions
--- a/c10/core/DeviceType.h
+++ b/c10/core/DeviceType.h
@ -1,100 +1,16 @@
 #pragma once

-// This is directly synchronized with caffe2/proto/caffe2.proto, but
-// doesn't require me to figure out how to get Protobuf headers into
-// ATen/core (which would require a lot more build system hacking.)
-// If you modify me, keep me synchronized with that file.
-
 #include <c10/macros/Export.h>

-#include <cstddef>
-#include <cstdint>
-#include <functional>
+// If you modified DeviceType in caffe2/proto/caffe2.proto, please also sync
+// your changes into torch/headeronly/core/DeviceType.h.
+#include <torch/headeronly/core/DeviceType.h>
+
 #include <ostream>
 #include <string>

 namespace c10 {

-// These contains all device types that also have a BackendComponent
-// and therefore participate in per-backend functionality dispatch keys.
-// This is most backends except PrivateUse2 and PrivateUse3
-#define C10_FORALL_BACKEND_DEVICE_TYPES(_, extra) \
-  _(CPU, extra)                                   \
-  _(CUDA, extra)                                  \
-  _(HIP, extra)                                   \
-  _(XLA, extra)                                   \
-  _(MPS, extra)                                   \
-  _(IPU, extra)                                   \
-  _(XPU, extra)                                   \
-  _(HPU, extra)                                   \
-  _(VE, extra)                                    \
-  _(Lazy, extra)                                  \
-  _(Meta, extra)                                  \
-  _(MTIA, extra)                                  \
-  _(PrivateUse1, extra)
-
-enum class DeviceType : int8_t {
-  CPU = 0,
-  CUDA = 1, // CUDA.
-  MKLDNN = 2, // Reserved for explicit MKLDNN
-  OPENGL = 3, // OpenGL
-  OPENCL = 4, // OpenCL
-  IDEEP = 5, // IDEEP.
-  HIP = 6, // AMD HIP
-  FPGA = 7, // FPGA
-  MAIA = 8, // ONNX Runtime / Microsoft
-  XLA = 9, // XLA / TPU
-  Vulkan = 10, // Vulkan
-  Metal = 11, // Metal
-  XPU = 12, // XPU
-  MPS = 13, // MPS
-  Meta = 14, // Meta (tensors with no data)
-  HPU = 15, // HPU / HABANA
-  VE = 16, // SX-Aurora / NEC
-  Lazy = 17, // Lazy Tensors
-  IPU = 18, // Graphcore IPU
-  MTIA = 19, // Meta training and inference devices
-  PrivateUse1 = 20, // PrivateUse1 device
-  // NB: If you add more devices:
-  //  - Change the implementations of DeviceTypeName and isValidDeviceType
-  //    in DeviceType.cpp
-  //  - Change the number below
-  COMPILE_TIME_MAX_DEVICE_TYPES = 21,
-};
-
-constexpr DeviceType kCPU = DeviceType::CPU;
-constexpr DeviceType kCUDA = DeviceType::CUDA;
-constexpr DeviceType kHIP = DeviceType::HIP;
-constexpr DeviceType kFPGA = DeviceType::FPGA;
-constexpr DeviceType kMAIA = DeviceType::MAIA;
-constexpr DeviceType kXLA = DeviceType::XLA;
-constexpr DeviceType kMPS = DeviceType::MPS;
-constexpr DeviceType kMeta = DeviceType::Meta;
-constexpr DeviceType kVulkan = DeviceType::Vulkan;
-constexpr DeviceType kMetal = DeviceType::Metal;
-constexpr DeviceType kXPU = DeviceType::XPU;
-constexpr DeviceType kHPU = DeviceType::HPU;
-constexpr DeviceType kVE = DeviceType::VE;
-constexpr DeviceType kLazy = DeviceType::Lazy;
-constexpr DeviceType kIPU = DeviceType::IPU;
-constexpr DeviceType kMTIA = DeviceType::MTIA;
-constexpr DeviceType kPrivateUse1 = DeviceType::PrivateUse1;
-
-// define explicit int constant
-constexpr int COMPILE_TIME_MAX_DEVICE_TYPES =
-    static_cast<int>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES);
-
-static_assert(
-    COMPILE_TIME_MAX_DEVICE_TYPES <= 21,
-    "Hey!  You seem to be adding a lot of new DeviceTypes.  The intent was "
-    "for this constant to reflect the actual number of DeviceTypes we support "
-    "in PyTorch; it's important that this number is not too large as we "
-    "use this to allocate stack arrays in some places in our code.  If you "
-    "are indeed just adding the 20th device type, feel free to change "
-    "the check to 32; but if you are adding some sort of extensible device "
-    "types registration, please be aware that you are affecting code that "
-    "this number is small.  Try auditing uses of this constant.");
-
 C10_API std::string DeviceTypeName(DeviceType d, bool lower_case = false);

 C10_API bool isValidDeviceType(DeviceType d);
@ -108,15 +24,6 @@ C10_API bool is_privateuse1_backend_registered();

 } // namespace c10

-namespace std {
-template <>
-struct hash<c10::DeviceType> {
-  std::size_t operator()(c10::DeviceType k) const {
-    return std::hash<int>()(static_cast<int>(k));
-  }
-};
-} // namespace std
-
 namespace torch {
 // NOLINTNEXTLINE(misc-unused-using-decls)
 using c10::DeviceType;
--- a/test/cpp/aoti_abi_check/CMakeLists.txt
+++ b/test/cpp/aoti_abi_check/CMakeLists.txt
@ -4,6 +4,7 @@ set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
 set(AOTI_ABI_CHECK_TEST_SRCS
  ${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
  ${AOTI_ABI_CHECK_TEST_ROOT}/test_cast.cpp
+  ${AOTI_ABI_CHECK_TEST_ROOT}/test_devicetype.cpp
  ${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
  ${AOTI_ABI_CHECK_TEST_ROOT}/test_exception.cpp
  ${AOTI_ABI_CHECK_TEST_ROOT}/test_macros.cpp
@ -27,7 +28,7 @@ add_executable(test_aoti_abi_check
 target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)

 # WARNING: DO NOT LINK torch!!!
-# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
+# The purpose is to check if the used aten/c10 headers are written in a header-only way
 target_link_libraries(test_aoti_abi_check PRIVATE gtest_main)
 target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})

--- a/test/cpp/aoti_abi_check/test_devicetype.cpp
+++ b/test/cpp/aoti_abi_check/test_devicetype.cpp
@ -0,0 +1,35 @@
+#include <gtest/gtest.h>
+
+#include <torch/headeronly/core/DeviceType.h>
+
+TEST(TestDeviceType, TestDeviceType) {
+  using torch::headeronly::DeviceType;
+  constexpr DeviceType expected_device_types[] = {
+      torch::headeronly::kCPU,
+      torch::headeronly::kCUDA,
+      DeviceType::MKLDNN,
+      DeviceType::OPENGL,
+      DeviceType::OPENCL,
+      DeviceType::IDEEP,
+      torch::headeronly::kHIP,
+      torch::headeronly::kFPGA,
+      torch::headeronly::kMAIA,
+      torch::headeronly::kXLA,
+      torch::headeronly::kVulkan,
+      torch::headeronly::kMetal,
+      torch::headeronly::kXPU,
+      torch::headeronly::kMPS,
+      torch::headeronly::kMeta,
+      torch::headeronly::kHPU,
+      torch::headeronly::kVE,
+      torch::headeronly::kLazy,
+      torch::headeronly::kIPU,
+      torch::headeronly::kMTIA,
+      torch::headeronly::kPrivateUse1,
+  };
+  for (int8_t i = 0; i <
+       static_cast<int8_t>(torch::headeronly::COMPILE_TIME_MAX_DEVICE_TYPES);
+       i++) {
+    EXPECT_EQ(static_cast<DeviceType>(i), expected_device_types[i]);
+  }
+}
--- a/torch/header_only_apis.txt
+++ b/torch/header_only_apis.txt
@ -95,6 +95,27 @@ bits4x2
 bits8
 bits16

+# torch/headeronly/core/DeviceType.h
+DeviceType
+kCPU
+kCUDA
+kHIP
+kFPGA
+kMAIA
+kXLA
+kMPS
+kMeta
+kVulkan
+kMetal
+kXPU
+kHPU
+kVE
+kLazy
+kIPU
+kMTIA
+kPrivateUse1
+COMPILE_TIME_MAX_DEVICE_TYPES
+
 # torch/headeronly/core/ScalarType.h
 NumScalarTypes
 ScalarType
--- a/torch/headeronly/core/DeviceType.h
+++ b/torch/headeronly/core/DeviceType.h
@ -0,0 +1,125 @@
+// This is directly synchronized with caffe2/proto/caffe2.proto, but
+// doesn't require me to figure out how to get Protobuf headers into
+// ATen/core (which would require a lot more build system hacking.)
+// If you modify me, keep me synchronized with that file.
+
+#include <torch/headeronly/macros/Export.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+
+namespace c10 {
+
+// These contains all device types that also have a BackendComponent
+// and therefore participate in per-backend functionality dispatch keys.
+// This is most backends except PrivateUse2 and PrivateUse3
+#define C10_FORALL_BACKEND_DEVICE_TYPES(_, extra) \
+  _(CPU, extra)                                   \
+  _(CUDA, extra)                                  \
+  _(HIP, extra)                                   \
+  _(XLA, extra)                                   \
+  _(MPS, extra)                                   \
+  _(IPU, extra)                                   \
+  _(XPU, extra)                                   \
+  _(HPU, extra)                                   \
+  _(VE, extra)                                    \
+  _(Lazy, extra)                                  \
+  _(Meta, extra)                                  \
+  _(MTIA, extra)                                  \
+  _(PrivateUse1, extra)
+
+enum class DeviceType : int8_t {
+  CPU = 0,
+  CUDA = 1, // CUDA.
+  MKLDNN = 2, // Reserved for explicit MKLDNN
+  OPENGL = 3, // OpenGL
+  OPENCL = 4, // OpenCL
+  IDEEP = 5, // IDEEP.
+  HIP = 6, // AMD HIP
+  FPGA = 7, // FPGA
+  MAIA = 8, // ONNX Runtime / Microsoft
+  XLA = 9, // XLA / TPU
+  Vulkan = 10, // Vulkan
+  Metal = 11, // Metal
+  XPU = 12, // XPU
+  MPS = 13, // MPS
+  Meta = 14, // Meta (tensors with no data)
+  HPU = 15, // HPU / HABANA
+  VE = 16, // SX-Aurora / NEC
+  Lazy = 17, // Lazy Tensors
+  IPU = 18, // Graphcore IPU
+  MTIA = 19, // Meta training and inference devices
+  PrivateUse1 = 20, // PrivateUse1 device
+  // NB: If you add more devices:
+  //  - Change the implementations of DeviceTypeName and isValidDeviceType
+  //    in c10/core/DeviceType.cpp
+  //  - Change the number below
+  COMPILE_TIME_MAX_DEVICE_TYPES = 21,
+};
+
+constexpr DeviceType kCPU = DeviceType::CPU;
+constexpr DeviceType kCUDA = DeviceType::CUDA;
+constexpr DeviceType kHIP = DeviceType::HIP;
+constexpr DeviceType kFPGA = DeviceType::FPGA;
+constexpr DeviceType kMAIA = DeviceType::MAIA;
+constexpr DeviceType kXLA = DeviceType::XLA;
+constexpr DeviceType kMPS = DeviceType::MPS;
+constexpr DeviceType kMeta = DeviceType::Meta;
+constexpr DeviceType kVulkan = DeviceType::Vulkan;
+constexpr DeviceType kMetal = DeviceType::Metal;
+constexpr DeviceType kXPU = DeviceType::XPU;
+constexpr DeviceType kHPU = DeviceType::HPU;
+constexpr DeviceType kVE = DeviceType::VE;
+constexpr DeviceType kLazy = DeviceType::Lazy;
+constexpr DeviceType kIPU = DeviceType::IPU;
+constexpr DeviceType kMTIA = DeviceType::MTIA;
+constexpr DeviceType kPrivateUse1 = DeviceType::PrivateUse1;
+
+// define explicit int constant
+constexpr int COMPILE_TIME_MAX_DEVICE_TYPES =
+    static_cast<int>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES);
+
+static_assert(
+    COMPILE_TIME_MAX_DEVICE_TYPES <= 21,
+    "Hey!  You seem to be adding a lot of new DeviceTypes.  The intent was "
+    "for this constant to reflect the actual number of DeviceTypes we support "
+    "in PyTorch; it's important that this number is not too large as we "
+    "use this to allocate stack arrays in some places in our code.  If you "
+    "are indeed just adding the 20th device type, feel free to change "
+    "the check to 32; but if you are adding some sort of extensible device "
+    "types registration, please be aware that you are affecting code that "
+    "this number is small.  Try auditing uses of this constant.");
+
+} // namespace c10
+
+namespace std {
+template <>
+struct hash<c10::DeviceType> {
+  std::size_t operator()(c10::DeviceType k) const {
+    return std::hash<int>()(static_cast<int>(k));
+  }
+};
+} // namespace std
+
+namespace torch::headeronly {
+using c10::COMPILE_TIME_MAX_DEVICE_TYPES;
+using c10::DeviceType;
+using c10::kCPU;
+using c10::kCUDA;
+using c10::kFPGA;
+using c10::kHIP;
+using c10::kHPU;
+using c10::kIPU;
+using c10::kLazy;
+using c10::kMAIA;
+using c10::kMeta;
+using c10::kMetal;
+using c10::kMPS;
+using c10::kMTIA;
+using c10::kPrivateUse1;
+using c10::kVE;
+using c10::kVulkan;
+using c10::kXLA;
+using c10::kXPU;
+} // namespace torch::headeronly