[1/4] Intel GPU Runtime Upstreaming for Device (#116019)

# Motivation
As mentioned in [[RFC] Intel GPU Runtime Upstreaming](https://github.com/pytorch/pytorch/issues/114842), The first runtime component we would like to upstream is `Device` which contains the device management functions of Intel GPU's runtime. To facilitate the code review, we split the code changes into 4 PRs. This is one of the 4 PRs and covers the changes under `c10`.

# Design
Intel GPU device is a wrapper of sycl device on which kernels can be executed. In our design, we will maintain a sycl device pool containing all the GPU devices of the current machine, and manage the status of the device pool by PyTorch. The thread local safe is considered in this design. The corresponding C++ files related to `Device` will be placed in c10/xpu folder. And we provide the c10 device runtime APIs, like
  - `c10::xpu::device_count`
  - `c10::xpu::set_device`
  - ...

# Additional Context
In our plan, 4 PRs should be submitted to PyTorch for `Device`:
1. for c10
2. for aten
3. for python frontend
4. for lazy initialization shared with CUDA

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116019
Approved by: https://github.com/gujinghui, https://github.com/jgong5, https://github.com/EikanWang, https://github.com/malfet
This commit is contained in:
Yu, Guangye
2024-01-12 07:36:25 +00:00
committed by PyTorch MergeBot
parent 7dac2f9f2d
commit 50049cfaa0
17 changed files with 637 additions and 0 deletions

View File

@ -258,6 +258,8 @@ exclude_patterns = [
'**/*pb.h',
'**/*CUDA*',
'**/cuda/*pp',
'**/*XPU*',
'**/xpu/*pp',
'c10/util/complex_math.h',
'c10/util/complex_utils.h',
'c10/util/flat_hash_map.h',

View File

@ -198,6 +198,9 @@ option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
option(USE_TSAN "Use Thread Sanitizer" OFF)
option(USE_CUDA "Use CUDA" ON)
cmake_dependent_option(
USE_XPU "Use XPU. Only available on Linux." ON
"LINUX" OFF)
cmake_dependent_option(
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
@ -1146,6 +1149,7 @@ if(BUILD_SHARED_LIBS)
COMPONENT dev)
install(FILES
${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake

View File

@ -8,6 +8,7 @@ cxx_library(
"test/**/*.cpp",
"benchmark/**/*.cpp",
"cuda/**/*.cpp",
"xpu/**/*.cpp",
],
),
deps = [
@ -30,6 +31,7 @@ cxx_library(
"test/**/*.h",
"benchmark/**/*.h",
"cuda/**/*.h",
"xpu/**/*.h",
],
),
exported_linker_flags = [],

View File

@ -145,6 +145,10 @@ if(USE_ROCM)
add_subdirectory(hip)
endif()
if(USE_XPU)
add_subdirectory(xpu)
endif()
# ---[ Installation
# Note: for now, we will put all export path into one single Caffe2Targets group
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the

View File

@ -155,6 +155,7 @@
namespace c10 {}
namespace c10::cuda {}
namespace c10::hip {}
namespace c10::xpu {}
// Since C10 is the core library for caffe2 (and aten), we will simply reroute
// all abstractions defined in c10 to be available in caffe2 as well.

39
c10/xpu/CMakeLists.txt Normal file
View File

@ -0,0 +1,39 @@
# Build file for the C10 XPU.
#
# C10 XPU is a minimal library, but it does depend on SYCL.
include(../../cmake/public/xpu.cmake)
set(C10_XPU_SRCS
XPUFunctions.cpp
)
set(C10_XPU_HEADERS
XPUDeviceProp.h
XPUFunctions.h
XPUMacros.h
)
add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
target_compile_options(c10_xpu PRIVATE "-DC10_XPU_BUILD_MAIN_LIB")
# Enable hidden visibility if compiler supports it.
if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
target_compile_options(c10_xpu PRIVATE "-fvisibility=hidden")
endif()
# ---[ Dependency of c10_xpu
target_link_libraries(c10_xpu PUBLIC c10 torch::xpurt)
target_include_directories(
c10_xpu PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:include>
)
add_subdirectory(test)
# ---[ Installation
install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
foreach(file ${C10_XPU_HEADERS})
get_filename_component(dir ${file} DIRECTORY)
install(FILES ${file} DESTINATION include/c10/xpu/${dir})
endforeach()

155
c10/xpu/XPUDeviceProp.h Normal file
View File

@ -0,0 +1,155 @@
#pragma once
#include <c10/xpu/XPUMacros.h>
#include <sycl/sycl.hpp>
namespace c10::xpu {
#define AT_FORALL_XPU_DEVICE_PROPERTIES(_) \
/* the device name of this SYCL device. */ \
_(name) \
\
/* the device type associated with the device. */ \
_(device_type) \
\
/* the vendor of this SYCL device. */ \
_(vendor) \
\
/* a backend-defined driver version as a std::string. */ \
_(driver_version) \
\
/* the SYCL version as a std::string in the form <major>.<minor> */ \
_(version) \
\
/* true if the SYCL device is available. Otherwise, return false. */ \
_(is_available) \
\
/* the maximum size in bytes of the arguments that can be passed to a \
* kernel. */ \
_(max_parameter_size) \
\
/* the number of parallel compute units available to the device. */ \
_(max_compute_units) \
\
/* the maximum dimensions that specify the global and local work-item IDs \
* used by the data parallel execution model. */ \
_(max_work_item_dimensions) \
\
/* the maximum number of workitems that are permitted in a work-group \
* executing a kernel on a single compute unit. */ \
_(max_work_group_size) \
\
/* the maximum number of subgroups in a work-group for any kernel executed \
* on the device. */ \
_(max_num_sub_groups) \
\
/* a std::vector of size_t containing the set of sub-group sizes supported \
* by the device. */ \
_(sub_group_sizes) \
\
/* the maximum configured clock frequency of this SYCL device in MHz. */ \
_(max_clock_frequency) \
\
/* the default compute device address space size specified as an unsigned \
* integer value in bits. Must return either 32 or 64. */ \
_(address_bits) \
\
/* the maximum size of memory object allocation in bytes. */ \
_(max_mem_alloc_size) \
\
/* the minimum value in bits of the largest supported SYCL built-in data \
* type if this SYCL device is not of device type \
* sycl::info::device_type::custom. */ \
_(mem_base_addr_align) \
\
/* a std::vector of info::fp_config describing the half/single/double \
* precision floating-point capability of this SYCL device. */ \
_(half_fp_config) \
_(single_fp_config) \
_(double_fp_config) \
\
/* the size of global device memory in bytes. */ \
_(global_mem_size) \
\
/* the type of global memory cache supported. */ \
_(global_mem_cache_type) \
\
/* the size of global memory cache in bytes. */ \
_(global_mem_cache_size) \
\
/* the size of global memory cache line in bytes. */ \
_(global_mem_cache_line_size) \
\
/* the type of local memory supported. */ \
_(local_mem_type) \
\
/* the size of local memory arena in bytes. */ \
_(local_mem_size) \
\
/* the maximum number of sub-devices that can be created when this device is \
* partitioned. */ \
_(partition_max_sub_devices) \
\
/* the resolution of device timer in nanoseconds. */ \
_(profiling_timer_resolution) \
\
/* the preferred native vector width size for built-in scalar types that can \
* be put into vectors. */ \
_(preferred_vector_width_char) \
_(preferred_vector_width_short) \
_(preferred_vector_width_int) \
_(preferred_vector_width_long) \
_(preferred_vector_width_float) \
_(preferred_vector_width_double) \
_(preferred_vector_width_half) \
\
/* the native ISA vector width. The vector width is defined as the number of \
* scalar elements that can be stored in the vector. */ \
_(native_vector_width_char) \
_(native_vector_width_short) \
_(native_vector_width_int) \
_(native_vector_width_long) \
_(native_vector_width_float) \
_(native_vector_width_double) \
_(native_vector_width_half)
#define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_) \
/* the number of EUs associated with the Intel GPU. */ \
_(gpu_eu_count, 512) \
\
/* the number of EUs in a subslice. */ \
_(gpu_eu_count_per_subslice, 8) \
\
/* the simd width of EU of GPU. */ \
_(gpu_eu_simd_width, 8) \
\
/* the number of hardware threads per EU of GPU. */ \
_(gpu_hw_threads_per_eu, 8)
#define _DEFINE_SYCL_PROP(ns, property, member) \
ns::property::return_type member;
#define DEFINE_DEVICE_PROP(property) \
_DEFINE_SYCL_PROP(sycl::info::device, property, property)
#define DEFINE_PLATFORM_PROP(property, member) \
_DEFINE_SYCL_PROP(sycl::info::platform, property, member)
#define DEFINE_EXT_DEVICE_PROP(property, ...) \
_DEFINE_SYCL_PROP(sycl::ext::intel::info::device, property, property)
struct C10_XPU_API DeviceProp {
AT_FORALL_XPU_DEVICE_PROPERTIES(DEFINE_DEVICE_PROP);
// the platform name.
DEFINE_PLATFORM_PROP(name, platform_name);
AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(DEFINE_EXT_DEVICE_PROP)
};
#undef _DEFINE_SYCL_PROP
#undef DEFINE_DEVICE_PROP
#undef DEFINE_PLATFORM_PROP
#undef DEFINE_EXT_DEVICE_PROP
} // namespace c10::xpu

182
c10/xpu/XPUFunctions.cpp Normal file
View File

@ -0,0 +1,182 @@
#include <c10/util/CallOnce.h>
#include <c10/util/Exception.h>
#include <c10/xpu/XPUFunctions.h>
#include <sys/wait.h>
#include <unistd.h>
#include <cmath>
#include <deque>
#include <mutex>
#include <vector>
namespace c10::xpu {
namespace {
/*
* Note [Device Management]
*
* An Intel GPU device qualifies as a type of SYCL device. This classification
* allows for the runtime querying of Intel GPU device information through the
* SYCL runtime library.
*
* Device status is managed through a SYCL device pool, with SYCL devices
* determined at runtime. There's currently a SYCL device pool that is lazily
* created and only initialized once, ensuring thread-local safety. Each device
* within the device pool shares the same default context.
*/
c10::once_flag init_flag;
thread_local DeviceIndex curDeviceIndex = 0;
struct DevicePool {
std::vector<std::unique_ptr<sycl::device>> devices;
std::unique_ptr<sycl::context> context;
} gDevicePool;
void enumDevices(std::vector<std::unique_ptr<sycl::device>>& devices) {
auto platform_list = sycl::platform::get_platforms();
// Enumerated GPU devices from the specific platform.
for (const auto& platform : platform_list) {
if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) {
continue;
}
auto device_list = platform.get_devices();
for (const auto& device : device_list) {
if (device.is_gpu()) {
devices.push_back(std::make_unique<sycl::device>(device));
}
}
}
}
inline void initGlobalDevicePoolState() {
// Enumerate all GPU devices and record them.
enumDevices(gDevicePool.devices);
if (gDevicePool.devices.empty()) {
TORCH_WARN("XPU device count is zero!");
return;
}
// The default context is utilized for each Intel GPU device, allowing the
// retrieval of the context from any GPU device.
gDevicePool.context = std::make_unique<sycl::context>(
gDevicePool.devices[0]->get_platform().ext_oneapi_get_default_context());
}
inline void initDevicePoolCallOnce() {
c10::call_once(init_flag, initGlobalDevicePoolState);
}
void initDeviceProperties(DeviceProp* device_prop, int device) {
using namespace sycl::info;
using namespace sycl::ext;
// Get raw sycl device associated with device index.
auto& raw_device = *gDevicePool.devices[device];
// Initialize the device properties associated with the specific device.
#define ASSIGN_DEVICE_PROP(property) \
device_prop->property = raw_device.get_info<device::property>();
#define ASSIGN_EXT_DEVICE_PROP(property, default_value) \
device_prop->property = raw_device.has(sycl::aspect::ext_intel_##property) \
? raw_device.get_info<intel::info::device::property>() \
: default_value;
AT_FORALL_XPU_DEVICE_PROPERTIES(ASSIGN_DEVICE_PROP);
device_prop->platform_name =
raw_device.get_info<device::platform>().get_info<platform::name>();
AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(ASSIGN_EXT_DEVICE_PROP);
return;
}
inline void check_device(int device) {
int total = static_cast<int>(gDevicePool.devices.size());
TORCH_CHECK(
device >= 0 && device < total,
"device is out of range, device is ",
device,
", total number of device is ",
total,
".");
}
} // anonymous namespace
sycl::device& get_raw_device(int device) {
initDevicePoolCallOnce();
check_device(device);
return *gDevicePool.devices[device];
}
sycl::context& get_device_context() {
initDevicePoolCallOnce();
TORCH_CHECK(
gDevicePool.context,
"Device pool initialization failed, you might not have an XPU device.")
return *gDevicePool.context;
}
void get_device_properties(DeviceProp* device_prop, int device) {
initDevicePoolCallOnce();
TORCH_CHECK(device_prop, "device_prop is an invalid pointer.");
check_device(device);
initDeviceProperties(device_prop, device);
}
int get_device_idx_from_pointer(void* ptr) {
initDevicePoolCallOnce();
TORCH_CHECK(ptr, "ptr is an invalid pointer.");
auto type = sycl::get_pointer_type(ptr, get_device_context());
TORCH_CHECK(
type == sycl::usm::alloc::device, "ptr is not a device type pointer.");
sycl::device raw_device = sycl::get_pointer_device(ptr, get_device_context());
auto match_device = [raw_device](const auto& device) -> bool {
return raw_device == *device;
};
auto it = std::find_if(
gDevicePool.devices.begin(), gDevicePool.devices.end(), match_device);
TORCH_CHECK(
it != gDevicePool.devices.end(),
"Cant't find the pointer from XPU devices.");
return static_cast<int>(std::distance(gDevicePool.devices.begin(), it));
}
DeviceIndex device_count() {
initDevicePoolCallOnce();
return static_cast<DeviceIndex>(gDevicePool.devices.size());
}
DeviceIndex device_count_ensure_non_zero() {
auto count = device_count();
// Zero gpus could produce a warning in `device_count` but we fail here.
TORCH_CHECK(count, "No XPU devices are available.");
return count;
}
DeviceIndex current_device() {
initDevicePoolCallOnce();
return curDeviceIndex;
}
void set_device(DeviceIndex device) {
initDevicePoolCallOnce();
check_device(static_cast<int>(device));
curDeviceIndex = device;
}
int exchange_device(int to_device) {
auto cur_device = static_cast<int>(current_device());
if (to_device == cur_device) {
return cur_device;
}
set_device(static_cast<DeviceIndex>(to_device));
return cur_device;
}
int maybe_exchange_device(int to_device) {
return exchange_device(to_device);
}
} // namespace c10::xpu

33
c10/xpu/XPUFunctions.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include <c10/core/Device.h>
#include <c10/xpu/XPUDeviceProp.h>
#include <c10/xpu/XPUMacros.h>
// The naming convention used here matches the naming convention of torch.xpu
namespace c10::xpu {
// Log a warning only once if no devices are detected.
C10_XPU_API DeviceIndex device_count();
// Throws an error if no devices are detected.
C10_XPU_API DeviceIndex device_count_ensure_non_zero();
C10_XPU_API DeviceIndex current_device();
C10_XPU_API void set_device(DeviceIndex device);
C10_XPU_API int exchange_device(int device);
C10_XPU_API int maybe_exchange_device(int to_device);
C10_XPU_API sycl::device& get_raw_device(int device);
C10_XPU_API sycl::context& get_device_context();
C10_XPU_API void get_device_properties(DeviceProp* device_prop, int device);
C10_XPU_API int get_device_idx_from_pointer(void* ptr);
} // namespace c10::xpu

19
c10/xpu/XPUMacros.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
// See c10/macros/Export.h for a detailed explanation of what the function
// of these macros are. We need one set of macros for every separate library
// we build.
#if defined(__GNUC__)
#define C10_XPU_EXPORT __attribute__((__visibility__("default")))
#else // defined(__GNUC__)
#define C10_XPU_EXPORT
#endif // defined(__GNUC__)
#define C10_XPU_IMPORT C10_XPU_EXPORT
// This one is being used by libc10_xpu.so
#ifdef C10_XPU_BUILD_MAIN_LIB
#define C10_XPU_API C10_XPU_EXPORT
#else
#define C10_XPU_API C10_XPU_IMPORT
#endif

View File

@ -0,0 +1,17 @@
# ---[ Test binaries.
set(C10_XPU_ALL_TEST_FILES
impl/XPUDeviceTest.cpp
)
if(BUILD_TEST)
foreach(test_src ${C10_XPU_ALL_TEST_FILES})
get_filename_component(test_file_name ${test_src} NAME_WE)
set(test_name "c10_xpu_${test_file_name}")
add_executable(${test_name} "${test_src}")
target_link_libraries(${test_name} c10_xpu gtest_main)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if(INSTALL_TEST)
install(TARGETS ${test_name} DESTINATION test)
endif()
endforeach()
endif()

View File

@ -0,0 +1,59 @@
#include <gtest/gtest.h>
#include <c10/xpu/XPUFunctions.h>
#define ASSERT_EQ_XPU(X, Y) \
{ \
bool _isEQ = X == Y; \
ASSERT_TRUE(_isEQ); \
}
bool has_xpu() {
return c10::xpu::device_count() > 0;
}
TEST(XPUDeviceTest, DeviceBehavior) {
if (!has_xpu()) {
return;
}
c10::xpu::set_device(0);
ASSERT_EQ_XPU(c10::xpu::current_device(), 0);
if (c10::xpu::device_count() <= 1) {
return;
}
c10::xpu::set_device(1);
ASSERT_EQ_XPU(c10::xpu::current_device(), 1);
ASSERT_EQ_XPU(c10::xpu::exchange_device(0), 1);
ASSERT_EQ_XPU(c10::xpu::current_device(), 0);
}
TEST(XPUDeviceTest, DeviceProperties) {
if (!has_xpu()) {
return;
}
c10::xpu::DeviceProp device_prop{};
c10::xpu::get_device_properties(&device_prop, 0);
ASSERT_TRUE(device_prop.max_compute_units > 0);
ASSERT_TRUE(device_prop.gpu_eu_count > 0);
}
TEST(XPUDeviceTest, PointerGetDevice) {
if (!has_xpu()) {
return;
}
sycl::device& raw_device = c10::xpu::get_raw_device(0);
void* ptr =
sycl::malloc_device(8, raw_device, c10::xpu::get_device_context());
ASSERT_EQ_XPU(c10::xpu::get_device_idx_from_pointer(ptr), 0);
sycl::free(ptr, c10::xpu::get_device_context());
int dummy = 0;
ASSERT_THROW(c10::xpu::get_device_idx_from_pointer(&dummy), c10::Error);
}

View File

@ -93,6 +93,16 @@ if(USE_CUDA)
endif()
endif()
# ---[ XPU
if(USE_XPU)
include(${CMAKE_CURRENT_LIST_DIR}/public/xpu.cmake)
if(NOT PYTORCH_FOUND_XPU)
# message(WARNING "Not compiling with XPU. Could NOT find SYCL."
# "Suppress this warning with -DUSE_XPU=OFF.")
caffe2_update_option(USE_XPU OFF)
endif()
endif()
# ---[ Custom Protobuf
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_BUILD_MOBILE)
disable_ubsan()

View File

@ -0,0 +1,70 @@
# This will define the following variables:
# SYCL_FOUND : True if the system has the SYCL library.
# SYCL_INCLUDE_DIR : Include directories needed to use SYCL.
# SYCL_LIBRARY_DIR The path to the SYCL library.
# SYCL_LIBRARY : SYCL library fullname.
include(FindPackageHandleStandardArgs)
set(SYCL_ROOT "")
if(DEFINED ENV{SYCL_ROOT})
set(SYCL_ROOT $ENV{SYCL_ROOT})
elseif(DEFINED ENV{CMPLR_ROOT})
set(SYCL_ROOT $ENV{CMPLR_ROOT})
endif()
string(COMPARE EQUAL "${SYCL_ROOT}" "" nosyclfound)
if(nosyclfound)
set(SYCL_FOUND False)
set(SYCL_REASON_FAILURE "SYCL library not set!!")
set(SYCL_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
return()
endif()
# Find include path from binary.
find_file(
SYCL_INCLUDE_DIR
NAMES include
HINTS ${SYCL_ROOT}
NO_DEFAULT_PATH
)
# Find include/sycl path from include path.
find_file(
SYCL_INCLUDE_SYCL_DIR
NAMES sycl
HINTS ${SYCL_ROOT}/include/
NO_DEFAULT_PATH
)
# Due to the unrecognized compilation option `-fsycl` in other compiler.
list(APPEND SYCL_INCLUDE_DIR ${SYCL_INCLUDE_SYCL_DIR})
# Find library directory from binary.
find_file(
SYCL_LIBRARY_DIR
NAMES lib lib64
HINTS ${SYCL_ROOT}
NO_DEFAULT_PATH
)
# Find SYCL library fullname.
find_library(
SYCL_LIBRARY
NAMES sycl
HINTS ${SYCL_LIBRARY_DIR}
NO_DEFAULT_PATH
)
if((NOT SYCL_INCLUDE_DIR) OR (NOT SYCL_LIBRARY_DIR) OR (NOT SYCL_LIBRARY))
set(SYCL_FOUND False)
set(SYCL_REASON_FAILURE "SYCL library is incomplete!!")
set(SYCL_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
return()
endif()
find_package_handle_standard_args(
SYCL
FOUND_VAR SYCL_FOUND
REQUIRED_VARS SYCL_INCLUDE_DIR SYCL_LIBRARY_DIR SYCL_LIBRARY
REASON_FAILURE_MESSAGE "${SYCL_REASON_FAILURE}")

View File

@ -113,6 +113,11 @@ function(caffe2_print_configuration_summary)
message(STATUS " TensorRT include path : ${TENSORRT_INCLUDE_DIR}")
endif()
endif()
message(STATUS " USE_XPU : ${USE_XPU}")
if(${USE_XPU})
message(STATUS " SYCL include path : ${SYCL_INCLUDE_DIR}")
message(STATUS " SYCL library : ${SYCL_LIBRARY}")
endif()
message(STATUS " USE_ROCM : ${USE_ROCM}")
if(${USE_ROCM})
message(STATUS " ROCM_VERSION : ${ROCM_VERSION}")

30
cmake/public/xpu.cmake Normal file
View File

@ -0,0 +1,30 @@
# ---[ xpu
# Poor man's include guard
if(TARGET torch::xpurt)
return()
endif()
# Find SYCL library.
find_package(SYCLToolkit REQUIRED)
if(NOT SYCL_FOUND)
set(PYTORCH_FOUND_XPU FALSE)
return()
endif()
set(PYTORCH_FOUND_XPU TRUE)
# SYCL library interface
add_library(torch::sycl INTERFACE IMPORTED)
set_property(
TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${SYCL_INCLUDE_DIR})
set_property(
TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
${SYCL_LIBRARY})
# xpurt
add_library(torch::xpurt INTERFACE IMPORTED)
set_property(
TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES
torch::sycl)

View File

@ -579,6 +579,10 @@ class build_ext(setuptools.command.build_ext.build_ext):
report("-- Detected CUDA at " + cmake_cache_vars["CUDA_TOOLKIT_ROOT_DIR"])
else:
report("-- Not using CUDA")
if cmake_cache_vars["USE_XPU"]:
report("-- Detected XPU runtime at " + cmake_cache_vars["SYCL_LIBRARY_DIR"])
else:
report("-- Not using XPU")
if cmake_cache_vars["USE_MKLDNN"]:
report("-- Using MKLDNN")
if cmake_cache_vars["USE_MKLDNN_ACL"]:
@ -1182,6 +1186,7 @@ def main():
"include/c10/cuda/impl/*.h",
"include/c10/hip/*.h",
"include/c10/hip/impl/*.h",
"include/c10/xpu/*.h",
"include/torch/*.h",
"include/torch/csrc/*.h",
"include/torch/csrc/api/include/torch/*.h",