diff --git a/c10/xpu/XPUDeviceProp.h b/c10/xpu/XPUDeviceProp.h index 591a14f4ad91..085c6367477f 100644 --- a/c10/xpu/XPUDeviceProp.h +++ b/c10/xpu/XPUDeviceProp.h @@ -115,19 +115,22 @@ namespace c10::xpu { #define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_) \ /* the number of EUs associated with the Intel GPU. */ \ - _(gpu_eu_count, 512) \ + _(gpu_eu_count, gpu_eu_count, 512) \ \ /* the number of EUs in a subslice. */ \ - _(gpu_eu_count_per_subslice, 8) \ + _(gpu_eu_count_per_subslice, gpu_eu_count_per_subslice, 8) \ \ /* the simd width of EU of GPU. */ \ - _(gpu_eu_simd_width, 8) \ + _(gpu_eu_simd_width, gpu_eu_simd_width, 8) \ \ /* the number of hardware threads per EU of GPU. */ \ - _(gpu_hw_threads_per_eu, 8) \ + _(gpu_hw_threads_per_eu, gpu_hw_threads_per_eu, 8) \ \ /* the device identifier of the Intel GPU, also known as the product ID. */ \ - _(device_id, 0) + _(device_id, device_id, 0) \ + \ + /* the device descriptor for device Universal Unique ID, 16 bytes*/ \ + _(uuid, device_info_uuid, (std::array{})) #define AT_FORALL_XPU_DEVICE_ASPECT(_) \ /* sycl::half is supported on device. */ \ diff --git a/c10/xpu/XPUFunctions.cpp b/c10/xpu/XPUFunctions.cpp index 5ea7d30e34cf..6947c078483e 100644 --- a/c10/xpu/XPUFunctions.cpp +++ b/c10/xpu/XPUFunctions.cpp @@ -157,9 +157,9 @@ void initDeviceProperties(DeviceProp* device_prop, DeviceIndex device) { #define ASSIGN_DEVICE_PROP(property) \ device_prop->property = raw_device.get_info(); -#define ASSIGN_EXT_DEVICE_PROP(property, default_value) \ - device_prop->property = raw_device.has(sycl::aspect::ext_intel_##property) \ - ? raw_device.get_info() \ +#define ASSIGN_EXT_DEVICE_PROP(property, aspect_tag, default_value) \ + device_prop->property = raw_device.has(sycl::aspect::ext_intel_##aspect_tag) \ + ? raw_device.get_info() \ : default_value; #define ASSIGN_DEVICE_ASPECT(member) \ diff --git a/test/test_xpu.py b/test/test_xpu.py index beb5a53a4a6b..04d045b00d8b 100644 --- a/test/test_xpu.py +++ b/test/test_xpu.py @@ -134,6 +134,10 @@ class TestXpu(TestCase): device_properties.architecture, device_capability["architecture"], ) + self.assertEqual( + len(str(device_properties.uuid)), 36 + ) # xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + self.assertEqual(len(device_properties.uuid.bytes), 16) @unittest.skipIf(IS_WINDOWS, "not applicable to Windows (only fails with fork)") def test_wrong_xpu_fork(self): diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index 566fc56db127..1c05db2cae78 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -2389,6 +2389,7 @@ class _XpuDeviceProperties: gpu_subslice_count: _int architecture: _int type: str + uuid: Any # Defined in torch/csrc/xpu/Stream.cpp class _XpuStreamBase(Stream): diff --git a/torch/csrc/cuda/Module.cpp b/torch/csrc/cuda/Module.cpp index b44ce311ecd9..1af168105765 100644 --- a/torch/csrc/cuda/Module.cpp +++ b/torch/csrc/cuda/Module.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -1017,34 +1016,6 @@ PyObject* THCPModule_cudaGetSyncDebugMode(PyObject* self, PyObject* noargs) { END_HANDLE_TH_ERRORS } -std::string uuid_to_string(const char* uuid_bytes) { - // UUIDs are a 128-bit label. CUDA and HIP store this as char[16]. - // For string representation, the code here expands this to - // 8-4-4-4-12 hex format, so each byte becomes 2 hex characters. - return fmt::format( - "{:02x}{:02x}{:02x}{:02x}-" - "{:02x}{:02x}-" - "{:02x}{:02x}-" - "{:02x}{:02x}-" - "{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", - (uint8_t)uuid_bytes[0], - (uint8_t)uuid_bytes[1], - (uint8_t)uuid_bytes[2], - (uint8_t)uuid_bytes[3], - (uint8_t)uuid_bytes[4], - (uint8_t)uuid_bytes[5], - (uint8_t)uuid_bytes[6], - (uint8_t)uuid_bytes[7], - (uint8_t)uuid_bytes[8], - (uint8_t)uuid_bytes[9], - (uint8_t)uuid_bytes[10], - (uint8_t)uuid_bytes[11], - (uint8_t)uuid_bytes[12], - (uint8_t)uuid_bytes[13], - (uint8_t)uuid_bytes[14], - (uint8_t)uuid_bytes[15]); -} - //////////////////////////////////////////////////////////////////////////////// // Cuda module initialization //////////////////////////////////////////////////////////////////////////////// diff --git a/torch/csrc/utils.cpp b/torch/csrc/utils.cpp index eee9af9d9ecb..c23a41e8e64e 100644 --- a/torch/csrc/utils.cpp +++ b/torch/csrc/utils.cpp @@ -240,6 +240,34 @@ uint8_t storage_get(const at::Storage& self, ptrdiff_t idx) { return self_t[idx].item(); } +std::string uuid_to_string(const char* uuid_bytes) { + // UUIDs are a 128-bit label. CUDA/HIP and XPU store this as char[16]. + // For string representation, the code here expands this to + // 8-4-4-4-12 hex format, so each byte becomes 2 hex characters. + return fmt::format( + "{:02x}{:02x}{:02x}{:02x}-" + "{:02x}{:02x}-" + "{:02x}{:02x}-" + "{:02x}{:02x}-" + "{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", + (uint8_t)uuid_bytes[0], + (uint8_t)uuid_bytes[1], + (uint8_t)uuid_bytes[2], + (uint8_t)uuid_bytes[3], + (uint8_t)uuid_bytes[4], + (uint8_t)uuid_bytes[5], + (uint8_t)uuid_bytes[6], + (uint8_t)uuid_bytes[7], + (uint8_t)uuid_bytes[8], + (uint8_t)uuid_bytes[9], + (uint8_t)uuid_bytes[10], + (uint8_t)uuid_bytes[11], + (uint8_t)uuid_bytes[12], + (uint8_t)uuid_bytes[13], + (uint8_t)uuid_bytes[14], + (uint8_t)uuid_bytes[15]); +} + template class THPPointer; // NOLINTBEGIN(misc-use-internal-linkage) namespace torch::gdb { diff --git a/torch/csrc/utils.h b/torch/csrc/utils.h index be79adccb74f..71a2b10e5904 100644 --- a/torch/csrc/utils.h +++ b/torch/csrc/utils.h @@ -201,3 +201,5 @@ bool maybeThrowBackCompatKeepdimWarn(char* func); void storage_fill(const at::Storage& self, uint8_t value); void storage_set(const at::Storage& self, ptrdiff_t idx, uint8_t value); uint8_t storage_get(const at::Storage& self, ptrdiff_t idx); + +std::string uuid_to_string(const char* uuid_bytes); diff --git a/torch/csrc/xpu/Module.cpp b/torch/csrc/xpu/Module.cpp index 715bf5b8fb66..d49fc0539a08 100644 --- a/torch/csrc/xpu/Module.cpp +++ b/torch/csrc/xpu/Module.cpp @@ -295,8 +295,23 @@ static void registerXpuDeviceProperties(PyObject* module) { return static_cast(prop.architecture); }; #endif + // Wrapper class for XPU UUID + struct XPUuuid { + XPUuuid(const std::array& uuid) : bytes(uuid) {} + const std::array& bytes{}; + }; auto m = py::handle(module).cast(); + py::class_(m, "_XPUuuid") + .def_property_readonly( + "bytes", + [](const XPUuuid& uuid) { + return std::vector(uuid.bytes.begin(), uuid.bytes.end()); + }) + .def("__str__", [](const XPUuuid& uuid) { + return uuid_to_string(reinterpret_cast(uuid.bytes.data())); + }); + #define DEFINE_READONLY_MEMBER(member) \ def_readonly(#member, &DeviceProp::member) @@ -328,6 +343,9 @@ static void registerXpuDeviceProperties(PyObject* module) { .def_property_readonly("architecture", get_device_architecture) #endif .def_property_readonly("type", get_device_type) + .def_property_readonly( + "uuid", + [](const DeviceProp& prop) -> XPUuuid { return XPUuuid(prop.uuid); }) .def( "__repr__", [&get_device_type, &gpu_subslice_count](const DeviceProp& prop) { @@ -335,7 +353,9 @@ static void registerXpuDeviceProperties(PyObject* module) { stream << "_XpuDeviceProperties(name='" << prop.name << "', platform_name='" << prop.platform_name << "', type='" << get_device_type(prop) << "', device_id=0x" << std::hex - << std::uppercase << prop.device_id << std::dec + << std::uppercase << prop.device_id << std::dec << ", uuid=" + << uuid_to_string( + reinterpret_cast(prop.uuid.data())) << ", driver_version='" << prop.driver_version << "', total_memory=" << prop.global_mem_size / (1024ull * 1024) << "MB" diff --git a/torch/xpu/__init__.py b/torch/xpu/__init__.py index 9a4ade5e71ea..79aae38a3168 100644 --- a/torch/xpu/__init__.py +++ b/torch/xpu/__init__.py @@ -236,15 +236,13 @@ def get_device_capability(device: Optional[_device_t] = None) -> dict[str, Any]: Dict[str, Any]: the xpu capability dictionary of the device """ props = get_device_properties(device) - # pybind service attributes are no longer needed and their presence breaks - # the further logic related to the serialization of the created dictionary. - # In particular it filters out `` - # to fix Triton tests. - # This field appears after updating pybind to 2.13.6. + # Only keep attributes that are safe for dictionary serialization. + serializable_types = (int, float, bool, str, type(None), list, tuple, dict) return { - prop: getattr(props, prop) - for prop in dir(props) - if not prop.startswith(("__", "_pybind11_")) + key: value + for key in dir(props) + if not key.startswith("__") + and isinstance((value := getattr(props, key)), serializable_types) }