mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[XROS][ML] System specific adjustments for UTs to work. (#65245)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/65245 Building and running c10 and qnnpack tests on XROS. Notable changes: - Adding #if define(_XROS_) in few places not supported by XROS - Changing Threadpool to abstract class ghstack-source-id: 139513579 Test Plan: Run c10 and qnnpack tests on XROS. Reviewed By: veselinp, iseeyuan Differential Revision: D30137333 fbshipit-source-id: bb6239b935187fac712834341fe5a8d3377762b1
This commit is contained in:
committed by
Facebook GitHub Bot
parent
363ccb257d
commit
eb3b9fe719
@ -43,7 +43,7 @@ namespace detail {
|
||||
* Note this is a legacy method (from THRandom.cpp)
|
||||
* FIXME: use std::random_device with entropy information
|
||||
*/
|
||||
#ifndef _WIN32
|
||||
#if !defined(_WIN32) && !defined(__XROS__)
|
||||
static uint64_t readURandomLong() {
|
||||
int randDev = open("/dev/urandom", O_RDONLY);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
@ -56,7 +56,7 @@ static uint64_t readURandomLong() {
|
||||
close(randDev);
|
||||
return randValue;
|
||||
}
|
||||
#endif // _WIN32
|
||||
#endif // _WIN32 && __XROS__
|
||||
|
||||
/**
|
||||
* Gets a non deterministic random number number from either the
|
||||
@ -82,6 +82,9 @@ uint64_t getNonDeterministicRandom(bool is_cuda) {
|
||||
s = (uint64_t)std::chrono::high_resolution_clock::now()
|
||||
.time_since_epoch()
|
||||
.count();
|
||||
#elif defined(__XROS__)
|
||||
std::random_device rd;
|
||||
s = ((((uint64_t)rd()) << 32) + rd()) & 0x1FFFFFFFFFFFFF;
|
||||
#elif defined(__SGX_ENABLED__)
|
||||
TORCH_CHECK(
|
||||
sgx_read_rand(reinterpret_cast<uint8_t*>(&s), sizeof(s)) == SGX_SUCCESS,
|
||||
|
@ -314,7 +314,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
|
||||
// CUDA_KERNEL_ASSERT checks the assertion
|
||||
// even when NDEBUG is defined. This is useful for important assertions in CUDA
|
||||
// code that would otherwise be suppressed when building Release.
|
||||
#if defined(__ANDROID__) || defined(__APPLE__) || \
|
||||
#if defined(__ANDROID__) || defined(__APPLE__) || defined(__XROS__) || \
|
||||
(defined(USE_ROCM) && ROCM_VERSION < 40100)
|
||||
// Those platforms do not support assert()
|
||||
#define CUDA_KERNEL_ASSERT(cond)
|
||||
|
@ -209,7 +209,9 @@ bool InitCaffeLogging(int* argc, char** argv) {
|
||||
::google::InitGoogleLogging(argv[0]);
|
||||
#if !defined(_MSC_VER)
|
||||
// This is never defined on Windows
|
||||
#if !defined(__XROS__)
|
||||
::google::InstallFailureSignalHandler();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
UpdateLoggingLevelsFromFlags();
|
||||
|
@ -4,7 +4,8 @@
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__)
|
||||
#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) || \
|
||||
defined(__XROS__)
|
||||
#define HAS_DEMANGLE 0
|
||||
#elif defined(__APPLE__) && \
|
||||
(TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)
|
||||
|
@ -20,6 +20,26 @@ C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
class ThreadPoolImpl : public ThreadPool {
|
||||
public:
|
||||
explicit ThreadPoolImpl(int numThreads);
|
||||
~ThreadPoolImpl() override;
|
||||
|
||||
// Returns the number of threads currently in use
|
||||
int getNumThreads() const override;
|
||||
void setNumThreads(size_t numThreads) override;
|
||||
|
||||
void run(const std::function<void(int, size_t)>& fn, size_t range) override;
|
||||
void withPool(const std::function<void(WorkersPool*)>& f) override;
|
||||
|
||||
private:
|
||||
std::atomic_size_t numThreads_;
|
||||
std::shared_ptr<WorkersPool> workersPool_;
|
||||
std::vector<std::shared_ptr<Task>> tasks_;
|
||||
};
|
||||
}
|
||||
|
||||
size_t getDefaultNumThreads() {
|
||||
CAFFE_ENFORCE(cpuinfo_initialize(), "cpuinfo initialization failed");
|
||||
int numThreads = cpuinfo_get_processors_count();
|
||||
@ -89,43 +109,40 @@ constexpr size_t kDefaultMinWorkSize = 1;
|
||||
|
||||
size_t ThreadPool::defaultNumThreads_ = 0;
|
||||
|
||||
ThreadPool* ThreadPool::createThreadPool(int numThreads) {
|
||||
return new ThreadPoolImpl(numThreads);
|
||||
}
|
||||
|
||||
std::unique_ptr<ThreadPool> ThreadPool::defaultThreadPool() {
|
||||
defaultNumThreads_ = getDefaultNumThreads();
|
||||
LOG(INFO) << "Constructing thread pool with " << defaultNumThreads_
|
||||
<< " threads";
|
||||
return std::make_unique<ThreadPool>(defaultNumThreads_);
|
||||
return std::make_unique<ThreadPoolImpl>(defaultNumThreads_);
|
||||
}
|
||||
|
||||
ThreadPool::ThreadPool(int numThreads)
|
||||
: minWorkSize_(kDefaultMinWorkSize),
|
||||
numThreads_(numThreads),
|
||||
workersPool_(std::make_shared<WorkersPool>()) {}
|
||||
ThreadPoolImpl::ThreadPoolImpl(int numThreads)
|
||||
: numThreads_(numThreads),
|
||||
workersPool_(std::make_shared<WorkersPool>()) {
|
||||
minWorkSize_ = kDefaultMinWorkSize;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(modernize-use-equals-default)
|
||||
ThreadPool::~ThreadPool() {}
|
||||
ThreadPoolImpl::~ThreadPoolImpl() {}
|
||||
|
||||
int ThreadPool::getNumThreads() const {
|
||||
int ThreadPoolImpl::getNumThreads() const {
|
||||
return numThreads_;
|
||||
}
|
||||
|
||||
// Sets the number of threads
|
||||
// # of threads should not be bigger than the number of big cores
|
||||
void ThreadPool::setNumThreads(size_t numThreads) {
|
||||
void ThreadPoolImpl::setNumThreads(size_t numThreads) {
|
||||
if (defaultNumThreads_ == 0) {
|
||||
defaultNumThreads_ = getDefaultNumThreads();
|
||||
}
|
||||
numThreads_ = std::min(numThreads, defaultNumThreads_);
|
||||
}
|
||||
|
||||
// Sets the minimum work size (range) for which to invoke the
|
||||
// threadpool; work sizes smaller than this will just be run on the
|
||||
// main (calling) thread
|
||||
void ThreadPool::setMinWorkSize(size_t size) {
|
||||
std::lock_guard<std::mutex> guard(executionMutex_);
|
||||
minWorkSize_ = size;
|
||||
}
|
||||
|
||||
void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
||||
void ThreadPoolImpl::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
||||
const auto numThreads = numThreads_.load(std::memory_order_relaxed);
|
||||
|
||||
std::lock_guard<std::mutex> guard(executionMutex_);
|
||||
@ -183,7 +200,7 @@ void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
||||
workersPool_->Execute(tasks_);
|
||||
}
|
||||
|
||||
void ThreadPool::withPool(const std::function<void(WorkersPool*)>& f) {
|
||||
void ThreadPoolImpl::withPool(const std::function<void(WorkersPool*)>& f) {
|
||||
std::lock_guard<std::mutex> guard(executionMutex_);
|
||||
f(workersPool_.get());
|
||||
}
|
||||
|
@ -32,33 +32,34 @@ constexpr size_t kCacheLineSize = 64;
|
||||
// TORCH_API and alignas annotations at the same time.
|
||||
class TORCH_API /*alignas(kCacheLineSize)*/ ThreadPool {
|
||||
public:
|
||||
static ThreadPool* createThreadPool(int numThreads);
|
||||
static std::unique_ptr<ThreadPool> defaultThreadPool();
|
||||
ThreadPool(int numThreads);
|
||||
~ThreadPool();
|
||||
virtual ~ThreadPool() = default;
|
||||
// Returns the number of threads currently in use
|
||||
int getNumThreads() const;
|
||||
void setNumThreads(size_t numThreads);
|
||||
virtual int getNumThreads() const = 0;
|
||||
virtual void setNumThreads(size_t numThreads) = 0;
|
||||
|
||||
// Sets the minimum work size (range) for which to invoke the
|
||||
// threadpool; work sizes smaller than this will just be run on the
|
||||
// main (calling) thread
|
||||
void setMinWorkSize(size_t size);
|
||||
void setMinWorkSize(size_t size) {
|
||||
std::lock_guard<std::mutex> guard(executionMutex_);
|
||||
minWorkSize_ = size;
|
||||
}
|
||||
|
||||
size_t getMinWorkSize() const {
|
||||
return minWorkSize_;
|
||||
}
|
||||
void run(const std::function<void(int, size_t)>& fn, size_t range);
|
||||
virtual void run(const std::function<void(int, size_t)>& fn, size_t range) = 0;
|
||||
|
||||
// Run an arbitrary function in a thread-safe manner accessing the Workers
|
||||
// Pool
|
||||
void withPool(const std::function<void(WorkersPool*)>& fn);
|
||||
virtual void withPool(const std::function<void(WorkersPool*)>& fn) = 0;
|
||||
|
||||
private:
|
||||
protected:
|
||||
static size_t defaultNumThreads_;
|
||||
mutable std::mutex executionMutex_;
|
||||
size_t minWorkSize_;
|
||||
std::atomic_size_t numThreads_;
|
||||
std::shared_ptr<WorkersPool> workersPool_;
|
||||
std::vector<std::shared_ptr<Task>> tasks_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
@ -83,7 +83,7 @@ size_t getDefaultNumThreads();
|
||||
PThreadPool* pthreadpool() {
|
||||
static auto threadpool =
|
||||
std::make_unique<PThreadPool>(getDefaultNumThreads());
|
||||
#ifndef WIN32
|
||||
#if !(defined(WIN32)) && !(defined(__XROS__))
|
||||
static std::once_flag flag;
|
||||
std::call_once(flag, []() {
|
||||
pthread_atfork(nullptr, nullptr, child_atfork);
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <stddef.h> // for size_t
|
||||
#include <stdint.h> // for uint32_t
|
||||
|
||||
#ifdef USE_PTHREADPOOL
|
||||
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
|
||||
// This is a hack.
|
||||
// Mainly introduced here because
|
||||
// 1. NNPACK can be compiled to use internal legacy threadpool implementation because much of C2 depends on that.
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "caffe2/utils/threadpool/pthreadpool-cpp.h"
|
||||
#include "caffe2/utils/threadpool/ThreadPool.h"
|
||||
|
||||
#ifdef USE_PTHREADPOOL
|
||||
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
static thread_local bool using_new_threadpool{false};
|
||||
@ -34,7 +34,7 @@ void legacy_pthreadpool_compute_1d(
|
||||
}
|
||||
return;
|
||||
}
|
||||
#ifdef USE_PTHREADPOOL
|
||||
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
|
||||
if (caffe2::using_new_threadpool) {
|
||||
pthreadpool_parallelize_1d(threadpool, function, argument, range, 0u);
|
||||
} else {
|
||||
@ -76,7 +76,7 @@ legacy_pthreadpool_t legacy_pthreadpool_create(size_t threads_count) {
|
||||
std::mutex thread_pool_creation_mutex_;
|
||||
std::lock_guard<std::mutex> guard(thread_pool_creation_mutex_);
|
||||
|
||||
return reinterpret_cast<legacy_pthreadpool_t>(new caffe2::ThreadPool(threads_count));
|
||||
return reinterpret_cast<legacy_pthreadpool_t>(caffe2::ThreadPool::createThreadPool(threads_count));
|
||||
}
|
||||
|
||||
void legacy_pthreadpool_destroy(legacy_pthreadpool_t pthreadpool) {
|
||||
|
Reference in New Issue
Block a user