mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Revert "[1/4] Intel GPU Runtime Upstreaming for Device (#116019)"
This reverts commit b4cebe2c34242ceee3a1bc285f426662942a29ac. Reverted https://github.com/pytorch/pytorch/pull/116019 on behalf of https://github.com/malfet due to Broke internal and periodic buck builds, see https://github.com/pytorch/pytorch/actions/runs/7414664129/job/20176215868 ([comment](https://github.com/pytorch/pytorch/pull/116019#issuecomment-1879030285))
This commit is contained in:
@ -255,8 +255,6 @@ exclude_patterns = [
|
||||
'**/*pb.h',
|
||||
'**/*CUDA*',
|
||||
'**/cuda/*pp',
|
||||
'**/*XPU*',
|
||||
'**/xpu/*pp',
|
||||
'aten/src/ATen/core/TensorImpl_test.cpp',
|
||||
'third_party/**/*',
|
||||
'torch/csrc/api/**',
|
||||
|
@ -200,9 +200,6 @@ option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
|
||||
option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
|
||||
option(USE_TSAN "Use Thread Sanitizer" OFF)
|
||||
option(USE_CUDA "Use CUDA" ON)
|
||||
cmake_dependent_option(
|
||||
USE_XPU "Use XPU. Only available on Linux." ON
|
||||
"LINUX" OFF)
|
||||
cmake_dependent_option(
|
||||
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
||||
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
||||
@ -1151,7 +1148,6 @@ if(BUILD_SHARED_LIBS)
|
||||
COMPONENT dev)
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
|
||||
|
@ -147,10 +147,6 @@ if(USE_ROCM)
|
||||
add_subdirectory(hip)
|
||||
endif()
|
||||
|
||||
if(USE_XPU)
|
||||
add_subdirectory(xpu)
|
||||
endif()
|
||||
|
||||
# ---[ Installation
|
||||
# Note: for now, we will put all export path into one single Caffe2Targets group
|
||||
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
|
||||
|
@ -155,7 +155,6 @@
|
||||
namespace c10 {}
|
||||
namespace c10::cuda {}
|
||||
namespace c10::hip {}
|
||||
namespace c10::xpu {}
|
||||
|
||||
// Since C10 is the core library for caffe2 (and aten), we will simply reroute
|
||||
// all abstractions defined in c10 to be available in caffe2 as well.
|
||||
|
@ -1,39 +0,0 @@
|
||||
# Build file for the C10 XPU.
|
||||
#
|
||||
# C10 XPU is a minimal library, but it does depend on SYCL.
|
||||
|
||||
include(../../cmake/public/xpu.cmake)
|
||||
|
||||
set(C10_XPU_SRCS
|
||||
XPUFunctions.cpp
|
||||
)
|
||||
set(C10_XPU_HEADERS
|
||||
XPUDeviceProp.h
|
||||
XPUFunctions.h
|
||||
XPUMacros.h
|
||||
)
|
||||
|
||||
add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
|
||||
target_compile_options(c10_xpu PRIVATE "-DC10_XPU_BUILD_MAIN_LIB")
|
||||
# Enable hidden visibility if compiler supports it.
|
||||
if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
|
||||
target_compile_options(c10_xpu PRIVATE "-fvisibility=hidden")
|
||||
endif()
|
||||
|
||||
# ---[ Dependency of c10_xpu
|
||||
target_link_libraries(c10_xpu PUBLIC c10 torch::xpurt)
|
||||
target_include_directories(
|
||||
c10_xpu PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
# ---[ Installation
|
||||
install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
|
||||
foreach(file ${C10_XPU_HEADERS})
|
||||
get_filename_component(dir ${file} DIRECTORY)
|
||||
install(FILES ${file} DESTINATION include/c10/xpu/${dir})
|
||||
endforeach()
|
@ -1,155 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/xpu/XPUMacros.h>
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
namespace c10::xpu {
|
||||
|
||||
#define AT_FORALL_XPU_DEVICE_PROPERTIES(_) \
|
||||
/* the device name of this SYCL device. */ \
|
||||
_(name) \
|
||||
\
|
||||
/* the device type associated with the device. */ \
|
||||
_(device_type) \
|
||||
\
|
||||
/* the vendor of this SYCL device. */ \
|
||||
_(vendor) \
|
||||
\
|
||||
/* a backend-defined driver version as a std::string. */ \
|
||||
_(driver_version) \
|
||||
\
|
||||
/* the SYCL version as a std::string in the form <major>.<minor> */ \
|
||||
_(version) \
|
||||
\
|
||||
/* true if the SYCL device is available. Otherwise, return false. */ \
|
||||
_(is_available) \
|
||||
\
|
||||
/* the maximum size in bytes of the arguments that can be passed to a \
|
||||
* kernel. */ \
|
||||
_(max_parameter_size) \
|
||||
\
|
||||
/* the number of parallel compute units available to the device. */ \
|
||||
_(max_compute_units) \
|
||||
\
|
||||
/* the maximum dimensions that specify the global and local work-item IDs \
|
||||
* used by the data parallel execution model. */ \
|
||||
_(max_work_item_dimensions) \
|
||||
\
|
||||
/* the maximum number of workitems that are permitted in a work-group \
|
||||
* executing a kernel on a single compute unit. */ \
|
||||
_(max_work_group_size) \
|
||||
\
|
||||
/* the maximum number of subgroups in a work-group for any kernel executed \
|
||||
* on the device. */ \
|
||||
_(max_num_sub_groups) \
|
||||
\
|
||||
/* a std::vector of size_t containing the set of sub-group sizes supported \
|
||||
* by the device. */ \
|
||||
_(sub_group_sizes) \
|
||||
\
|
||||
/* the maximum configured clock frequency of this SYCL device in MHz. */ \
|
||||
_(max_clock_frequency) \
|
||||
\
|
||||
/* the default compute device address space size specified as an unsigned \
|
||||
* integer value in bits. Must return either 32 or 64. */ \
|
||||
_(address_bits) \
|
||||
\
|
||||
/* the maximum size of memory object allocation in bytes. */ \
|
||||
_(max_mem_alloc_size) \
|
||||
\
|
||||
/* the minimum value in bits of the largest supported SYCL built-in data \
|
||||
* type if this SYCL device is not of device type \
|
||||
* sycl::info::device_type::custom. */ \
|
||||
_(mem_base_addr_align) \
|
||||
\
|
||||
/* a std::vector of info::fp_config describing the half/single/double \
|
||||
* precision floating-point capability of this SYCL device. */ \
|
||||
_(half_fp_config) \
|
||||
_(single_fp_config) \
|
||||
_(double_fp_config) \
|
||||
\
|
||||
/* the size of global device memory in bytes. */ \
|
||||
_(global_mem_size) \
|
||||
\
|
||||
/* the type of global memory cache supported. */ \
|
||||
_(global_mem_cache_type) \
|
||||
\
|
||||
/* the size of global memory cache in bytes. */ \
|
||||
_(global_mem_cache_size) \
|
||||
\
|
||||
/* the size of global memory cache line in bytes. */ \
|
||||
_(global_mem_cache_line_size) \
|
||||
\
|
||||
/* the type of local memory supported. */ \
|
||||
_(local_mem_type) \
|
||||
\
|
||||
/* the size of local memory arena in bytes. */ \
|
||||
_(local_mem_size) \
|
||||
\
|
||||
/* the maximum number of sub-devices that can be created when this device is \
|
||||
* partitioned. */ \
|
||||
_(partition_max_sub_devices) \
|
||||
\
|
||||
/* the resolution of device timer in nanoseconds. */ \
|
||||
_(profiling_timer_resolution) \
|
||||
\
|
||||
/* the preferred native vector width size for built-in scalar types that can \
|
||||
* be put into vectors. */ \
|
||||
_(preferred_vector_width_char) \
|
||||
_(preferred_vector_width_short) \
|
||||
_(preferred_vector_width_int) \
|
||||
_(preferred_vector_width_long) \
|
||||
_(preferred_vector_width_float) \
|
||||
_(preferred_vector_width_double) \
|
||||
_(preferred_vector_width_half) \
|
||||
\
|
||||
/* the native ISA vector width. The vector width is defined as the number of \
|
||||
* scalar elements that can be stored in the vector. */ \
|
||||
_(native_vector_width_char) \
|
||||
_(native_vector_width_short) \
|
||||
_(native_vector_width_int) \
|
||||
_(native_vector_width_long) \
|
||||
_(native_vector_width_float) \
|
||||
_(native_vector_width_double) \
|
||||
_(native_vector_width_half)
|
||||
|
||||
#define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_) \
|
||||
/* the number of EUs associated with the Intel GPU. */ \
|
||||
_(gpu_eu_count, 512) \
|
||||
\
|
||||
/* the number of EUs in a subslice. */ \
|
||||
_(gpu_eu_count_per_subslice, 8) \
|
||||
\
|
||||
/* the simd width of EU of GPU. */ \
|
||||
_(gpu_eu_simd_width, 8) \
|
||||
\
|
||||
/* the number of hardware threads per EU of GPU. */ \
|
||||
_(gpu_hw_threads_per_eu, 8)
|
||||
|
||||
#define _DEFINE_SYCL_PROP(ns, property, member) \
|
||||
ns::property::return_type member;
|
||||
|
||||
#define DEFINE_DEVICE_PROP(property) \
|
||||
_DEFINE_SYCL_PROP(sycl::info::device, property, property)
|
||||
|
||||
#define DEFINE_PLATFORM_PROP(property, member) \
|
||||
_DEFINE_SYCL_PROP(sycl::info::platform, property, member)
|
||||
|
||||
#define DEFINE_EXT_DEVICE_PROP(property, ...) \
|
||||
_DEFINE_SYCL_PROP(sycl::ext::intel::info::device, property, property)
|
||||
|
||||
struct C10_XPU_API DeviceProp {
|
||||
AT_FORALL_XPU_DEVICE_PROPERTIES(DEFINE_DEVICE_PROP);
|
||||
|
||||
// the platform name.
|
||||
DEFINE_PLATFORM_PROP(name, platform_name);
|
||||
|
||||
AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(DEFINE_EXT_DEVICE_PROP)
|
||||
};
|
||||
|
||||
#undef _DEFINE_SYCL_PROP
|
||||
#undef DEFINE_DEVICE_PROP
|
||||
#undef DEFINE_PLATFORM_PROP
|
||||
#undef DEFINE_EXT_DEVICE_PROP
|
||||
|
||||
} // namespace c10::xpu
|
@ -1,182 +0,0 @@
|
||||
#include <c10/util/CallOnce.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/xpu/XPUFunctions.h>
|
||||
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <cmath>
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace c10::xpu {
|
||||
namespace {
|
||||
|
||||
/*
|
||||
* Note [Device Management]
|
||||
*
|
||||
* An Intel GPU device qualifies as a type of SYCL device. This classification
|
||||
* allows for the runtime querying of Intel GPU device information through the
|
||||
* SYCL runtime library.
|
||||
*
|
||||
* Device status is managed through a SYCL device pool, with SYCL devices
|
||||
* determined at runtime. There's currently a SYCL device pool that is lazily
|
||||
* created and only initialized once, ensuring thread-local safety. Each device
|
||||
* within the device pool shares the same default context.
|
||||
*/
|
||||
c10::once_flag init_flag;
|
||||
thread_local DeviceIndex curDeviceIndex = 0;
|
||||
|
||||
struct DevicePool {
|
||||
std::vector<std::unique_ptr<sycl::device>> devices;
|
||||
std::unique_ptr<sycl::context> context;
|
||||
} gDevicePool;
|
||||
|
||||
void enumDevices(std::vector<std::unique_ptr<sycl::device>>& devices) {
|
||||
auto platform_list = sycl::platform::get_platforms();
|
||||
// Enumerated GPU devices from the specific platform.
|
||||
for (const auto& platform : platform_list) {
|
||||
if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) {
|
||||
continue;
|
||||
}
|
||||
auto device_list = platform.get_devices();
|
||||
for (const auto& device : device_list) {
|
||||
if (device.is_gpu()) {
|
||||
devices.push_back(std::make_unique<sycl::device>(device));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void initGlobalDevicePoolState() {
|
||||
// Enumerate all GPU devices and record them.
|
||||
enumDevices(gDevicePool.devices);
|
||||
if (gDevicePool.devices.empty()) {
|
||||
TORCH_WARN("XPU device count is zero!");
|
||||
return;
|
||||
}
|
||||
|
||||
// The default context is utilized for each Intel GPU device, allowing the
|
||||
// retrieval of the context from any GPU device.
|
||||
gDevicePool.context = std::make_unique<sycl::context>(
|
||||
gDevicePool.devices[0]->get_platform().ext_oneapi_get_default_context());
|
||||
}
|
||||
|
||||
inline void initDevicePoolCallOnce() {
|
||||
c10::call_once(init_flag, initGlobalDevicePoolState);
|
||||
}
|
||||
|
||||
void initDeviceProperties(DeviceProp* device_prop, int device) {
|
||||
using namespace sycl::info;
|
||||
using namespace sycl::ext;
|
||||
// Get raw sycl device associated with device index.
|
||||
auto& raw_device = *gDevicePool.devices[device];
|
||||
|
||||
// Initialize the device properties associated with the specific device.
|
||||
#define ASSIGN_DEVICE_PROP(property) \
|
||||
device_prop->property = raw_device.get_info<device::property>();
|
||||
|
||||
#define ASSIGN_EXT_DEVICE_PROP(property, default_value) \
|
||||
device_prop->property = raw_device.has(sycl::aspect::ext_intel_##property) \
|
||||
? raw_device.get_info<intel::info::device::property>() \
|
||||
: default_value;
|
||||
|
||||
AT_FORALL_XPU_DEVICE_PROPERTIES(ASSIGN_DEVICE_PROP);
|
||||
|
||||
device_prop->platform_name =
|
||||
raw_device.get_info<device::platform>().get_info<platform::name>();
|
||||
|
||||
AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(ASSIGN_EXT_DEVICE_PROP);
|
||||
return;
|
||||
}
|
||||
|
||||
inline void check_device(int device) {
|
||||
int total = static_cast<int>(gDevicePool.devices.size());
|
||||
TORCH_CHECK(
|
||||
device >= 0 && device < total,
|
||||
"device is out of range, device is ",
|
||||
device,
|
||||
", total number of device is ",
|
||||
total,
|
||||
".");
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
sycl::device& get_raw_device(int device) {
|
||||
initDevicePoolCallOnce();
|
||||
check_device(device);
|
||||
return *gDevicePool.devices[device];
|
||||
}
|
||||
|
||||
sycl::context& get_device_context() {
|
||||
initDevicePoolCallOnce();
|
||||
TORCH_CHECK(
|
||||
gDevicePool.context,
|
||||
"Device pool initialization failed, you might not have an XPU device.")
|
||||
return *gDevicePool.context;
|
||||
}
|
||||
|
||||
void get_device_properties(DeviceProp* device_prop, int device) {
|
||||
initDevicePoolCallOnce();
|
||||
TORCH_CHECK(device_prop, "device_prop is an invalid pointer.");
|
||||
check_device(device);
|
||||
initDeviceProperties(device_prop, device);
|
||||
}
|
||||
|
||||
int get_device_idx_from_pointer(void* ptr) {
|
||||
initDevicePoolCallOnce();
|
||||
TORCH_CHECK(ptr, "ptr is an invalid pointer.");
|
||||
auto type = sycl::get_pointer_type(ptr, get_device_context());
|
||||
TORCH_CHECK(
|
||||
type == sycl::usm::alloc::device, "ptr is not a device type pointer.");
|
||||
|
||||
sycl::device raw_device = sycl::get_pointer_device(ptr, get_device_context());
|
||||
auto match_device = [raw_device](const auto& device) -> bool {
|
||||
return raw_device == *device;
|
||||
};
|
||||
auto it = std::find_if(
|
||||
gDevicePool.devices.begin(), gDevicePool.devices.end(), match_device);
|
||||
TORCH_CHECK(
|
||||
it != gDevicePool.devices.end(),
|
||||
"Cant't find the pointer from XPU devices.");
|
||||
return static_cast<int>(std::distance(gDevicePool.devices.begin(), it));
|
||||
}
|
||||
|
||||
DeviceIndex device_count() {
|
||||
initDevicePoolCallOnce();
|
||||
return static_cast<DeviceIndex>(gDevicePool.devices.size());
|
||||
}
|
||||
|
||||
DeviceIndex device_count_ensure_non_zero() {
|
||||
auto count = device_count();
|
||||
// Zero gpus could produce a warning in `device_count` but we fail here.
|
||||
TORCH_CHECK(count, "No XPU devices are available.");
|
||||
return count;
|
||||
}
|
||||
|
||||
DeviceIndex current_device() {
|
||||
initDevicePoolCallOnce();
|
||||
return curDeviceIndex;
|
||||
}
|
||||
|
||||
void set_device(DeviceIndex device) {
|
||||
initDevicePoolCallOnce();
|
||||
check_device(static_cast<int>(device));
|
||||
curDeviceIndex = device;
|
||||
}
|
||||
|
||||
int exchange_device(int to_device) {
|
||||
auto cur_device = static_cast<int>(current_device());
|
||||
if (to_device == cur_device) {
|
||||
return cur_device;
|
||||
}
|
||||
set_device(static_cast<DeviceIndex>(to_device));
|
||||
return cur_device;
|
||||
}
|
||||
|
||||
int maybe_exchange_device(int to_device) {
|
||||
return exchange_device(to_device);
|
||||
}
|
||||
|
||||
} // namespace c10::xpu
|
@ -1,33 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/xpu/XPUDeviceProp.h>
|
||||
#include <c10/xpu/XPUMacros.h>
|
||||
|
||||
// The naming convention used here matches the naming convention of torch.xpu
|
||||
|
||||
namespace c10::xpu {
|
||||
|
||||
// Log a warning only once if no devices are detected.
|
||||
C10_XPU_API DeviceIndex device_count();
|
||||
|
||||
// Throws an error if no devices are detected.
|
||||
C10_XPU_API DeviceIndex device_count_ensure_non_zero();
|
||||
|
||||
C10_XPU_API DeviceIndex current_device();
|
||||
|
||||
C10_XPU_API void set_device(DeviceIndex device);
|
||||
|
||||
C10_XPU_API int exchange_device(int device);
|
||||
|
||||
C10_XPU_API int maybe_exchange_device(int to_device);
|
||||
|
||||
C10_XPU_API sycl::device& get_raw_device(int device);
|
||||
|
||||
C10_XPU_API sycl::context& get_device_context();
|
||||
|
||||
C10_XPU_API void get_device_properties(DeviceProp* device_prop, int device);
|
||||
|
||||
C10_XPU_API int get_device_idx_from_pointer(void* ptr);
|
||||
|
||||
} // namespace c10::xpu
|
@ -1,19 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// See c10/macros/Export.h for a detailed explanation of what the function
|
||||
// of these macros are. We need one set of macros for every separate library
|
||||
// we build.
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define C10_XPU_EXPORT __attribute__((__visibility__("default")))
|
||||
#else // defined(__GNUC__)
|
||||
#define C10_XPU_EXPORT
|
||||
#endif // defined(__GNUC__)
|
||||
#define C10_XPU_IMPORT C10_XPU_EXPORT
|
||||
|
||||
// This one is being used by libc10_xpu.so
|
||||
#ifdef C10_XPU_BUILD_MAIN_LIB
|
||||
#define C10_XPU_API C10_XPU_EXPORT
|
||||
#else
|
||||
#define C10_XPU_API C10_XPU_IMPORT
|
||||
#endif
|
@ -1,17 +0,0 @@
|
||||
# ---[ Test binaries.
|
||||
|
||||
set(C10_XPU_ALL_TEST_FILES
|
||||
impl/XPUDeviceTest.cpp
|
||||
)
|
||||
if(BUILD_TEST)
|
||||
foreach(test_src ${C10_XPU_ALL_TEST_FILES})
|
||||
get_filename_component(test_file_name ${test_src} NAME_WE)
|
||||
set(test_name "c10_xpu_${test_file_name}")
|
||||
add_executable(${test_name} "${test_src}")
|
||||
target_link_libraries(${test_name} c10_xpu gtest_main)
|
||||
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS ${test_name} DESTINATION test)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
@ -1,59 +0,0 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/xpu/XPUFunctions.h>
|
||||
|
||||
#define ASSERT_EQ_XPU(X, Y) \
|
||||
{ \
|
||||
bool _isEQ = X == Y; \
|
||||
ASSERT_TRUE(_isEQ); \
|
||||
}
|
||||
|
||||
bool has_xpu() {
|
||||
return c10::xpu::device_count() > 0;
|
||||
}
|
||||
|
||||
TEST(XPUDeviceTest, DeviceBehavior) {
|
||||
if (!has_xpu()) {
|
||||
return;
|
||||
}
|
||||
|
||||
c10::xpu::set_device(0);
|
||||
ASSERT_EQ_XPU(c10::xpu::current_device(), 0);
|
||||
|
||||
if (c10::xpu::device_count() <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
c10::xpu::set_device(1);
|
||||
ASSERT_EQ_XPU(c10::xpu::current_device(), 1);
|
||||
ASSERT_EQ_XPU(c10::xpu::exchange_device(0), 1);
|
||||
ASSERT_EQ_XPU(c10::xpu::current_device(), 0);
|
||||
}
|
||||
|
||||
TEST(XPUDeviceTest, DeviceProperties) {
|
||||
if (!has_xpu()) {
|
||||
return;
|
||||
}
|
||||
|
||||
c10::xpu::DeviceProp device_prop{};
|
||||
c10::xpu::get_device_properties(&device_prop, 0);
|
||||
|
||||
ASSERT_TRUE(device_prop.max_compute_units > 0);
|
||||
ASSERT_TRUE(device_prop.gpu_eu_count > 0);
|
||||
}
|
||||
|
||||
TEST(XPUDeviceTest, PointerGetDevice) {
|
||||
if (!has_xpu()) {
|
||||
return;
|
||||
}
|
||||
|
||||
sycl::device& raw_device = c10::xpu::get_raw_device(0);
|
||||
void* ptr =
|
||||
sycl::malloc_device(8, raw_device, c10::xpu::get_device_context());
|
||||
|
||||
ASSERT_EQ_XPU(c10::xpu::get_device_idx_from_pointer(ptr), 0);
|
||||
sycl::free(ptr, c10::xpu::get_device_context());
|
||||
|
||||
int dummy = 0;
|
||||
ASSERT_THROW(c10::xpu::get_device_idx_from_pointer(&dummy), c10::Error);
|
||||
}
|
@ -93,16 +93,6 @@ if(USE_CUDA)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ XPU
|
||||
if(USE_XPU)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/xpu.cmake)
|
||||
if(NOT PYTORCH_FOUND_XPU)
|
||||
# message(WARNING "Not compiling with XPU. Could NOT find SYCL."
|
||||
# "Suppress this warning with -DUSE_XPU=OFF.")
|
||||
caffe2_update_option(USE_XPU OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ Custom Protobuf
|
||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_BUILD_MOBILE)
|
||||
disable_ubsan()
|
||||
|
@ -1,70 +0,0 @@
|
||||
# This will define the following variables:
|
||||
# SYCL_FOUND : True if the system has the SYCL library.
|
||||
# SYCL_INCLUDE_DIR : Include directories needed to use SYCL.
|
||||
# SYCL_LIBRARY_DIR :The path to the SYCL library.
|
||||
# SYCL_LIBRARY : SYCL library fullname.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
set(SYCL_ROOT "")
|
||||
if(DEFINED ENV{SYCL_ROOT})
|
||||
set(SYCL_ROOT $ENV{SYCL_ROOT})
|
||||
elseif(DEFINED ENV{CMPLR_ROOT})
|
||||
set(SYCL_ROOT $ENV{CMPLR_ROOT})
|
||||
endif()
|
||||
|
||||
string(COMPARE EQUAL "${SYCL_ROOT}" "" nosyclfound)
|
||||
if(nosyclfound)
|
||||
set(SYCL_FOUND False)
|
||||
set(SYCL_REASON_FAILURE "SYCL library not set!!")
|
||||
set(SYCL_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Find include path from binary.
|
||||
find_file(
|
||||
SYCL_INCLUDE_DIR
|
||||
NAMES include
|
||||
HINTS ${SYCL_ROOT}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
# Find include/sycl path from include path.
|
||||
find_file(
|
||||
SYCL_INCLUDE_SYCL_DIR
|
||||
NAMES sycl
|
||||
HINTS ${SYCL_ROOT}/include/
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
# Due to the unrecognized compilation option `-fsycl` in other compiler.
|
||||
list(APPEND SYCL_INCLUDE_DIR ${SYCL_INCLUDE_SYCL_DIR})
|
||||
|
||||
# Find library directory from binary.
|
||||
find_file(
|
||||
SYCL_LIBRARY_DIR
|
||||
NAMES lib lib64
|
||||
HINTS ${SYCL_ROOT}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
# Find SYCL library fullname.
|
||||
find_library(
|
||||
SYCL_LIBRARY
|
||||
NAMES sycl
|
||||
HINTS ${SYCL_LIBRARY_DIR}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
if((NOT SYCL_INCLUDE_DIR) OR (NOT SYCL_LIBRARY_DIR) OR (NOT SYCL_LIBRARY))
|
||||
set(SYCL_FOUND False)
|
||||
set(SYCL_REASON_FAILURE "SYCL library is incomplete!!")
|
||||
set(SYCL_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(
|
||||
SYCL
|
||||
FOUND_VAR SYCL_FOUND
|
||||
REQUIRED_VARS SYCL_INCLUDE_DIR SYCL_LIBRARY_DIR SYCL_LIBRARY
|
||||
REASON_FAILURE_MESSAGE "${SYCL_REASON_FAILURE}")
|
@ -114,11 +114,6 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " TensorRT include path : ${TENSORRT_INCLUDE_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS " USE_XPU : ${USE_XPU}")
|
||||
if(${USE_XPU})
|
||||
message(STATUS " SYCL include path : ${SYCL_INCLUDE_DIR}")
|
||||
message(STATUS " SYCL library : ${SYCL_LIBRARY}")
|
||||
endif()
|
||||
message(STATUS " USE_ROCM : ${USE_ROCM}")
|
||||
if(${USE_ROCM})
|
||||
message(STATUS " ROCM_VERSION : ${ROCM_VERSION}")
|
||||
|
@ -1,30 +0,0 @@
|
||||
# ---[ xpu
|
||||
|
||||
# Poor man's include guard
|
||||
if(TARGET torch::xpurt)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Find SYCL library.
|
||||
find_package(SYCLToolkit REQUIRED)
|
||||
if(NOT SYCL_FOUND)
|
||||
set(PYTORCH_FOUND_XPU FALSE)
|
||||
return()
|
||||
endif()
|
||||
set(PYTORCH_FOUND_XPU TRUE)
|
||||
|
||||
# SYCL library interface
|
||||
add_library(torch::sycl INTERFACE IMPORTED)
|
||||
|
||||
set_property(
|
||||
TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${SYCL_INCLUDE_DIR})
|
||||
set_property(
|
||||
TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${SYCL_LIBRARY})
|
||||
|
||||
# xpurt
|
||||
add_library(torch::xpurt INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
torch::sycl)
|
5
setup.py
5
setup.py
@ -579,10 +579,6 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
||||
report("-- Detected CUDA at " + cmake_cache_vars["CUDA_TOOLKIT_ROOT_DIR"])
|
||||
else:
|
||||
report("-- Not using CUDA")
|
||||
if cmake_cache_vars["USE_XPU"]:
|
||||
report("-- Detected XPU runtime at " + cmake_cache_vars["SYCL_LIBRARY_DIR"])
|
||||
else:
|
||||
report("-- Not using XPU")
|
||||
if cmake_cache_vars["USE_MKLDNN"]:
|
||||
report("-- Using MKLDNN")
|
||||
if cmake_cache_vars["USE_MKLDNN_ACL"]:
|
||||
@ -1187,7 +1183,6 @@ def main():
|
||||
"include/c10/cuda/impl/*.h",
|
||||
"include/c10/hip/*.h",
|
||||
"include/c10/hip/impl/*.h",
|
||||
"include/c10/xpu/*.h",
|
||||
"include/torch/*.h",
|
||||
"include/torch/csrc/*.h",
|
||||
"include/torch/csrc/api/include/torch/*.h",
|
||||
|
Reference in New Issue
Block a user