[XROS][ML] System specific adjustments for UTs to work. (#65245)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/65245

Building and running c10 and qnnpack tests on XROS.

Notable changes:
- Adding #if define(_XROS_) in few places not supported by XROS
- Changing Threadpool to abstract class
ghstack-source-id: 139513579

Test Plan: Run c10 and qnnpack tests on XROS.

Reviewed By: veselinp, iseeyuan

Differential Revision: D30137333

fbshipit-source-id: bb6239b935187fac712834341fe5a8d3377762b1
This commit is contained in:
Karol Kosik
2021-10-01 18:13:39 -07:00
committed by Facebook GitHub Bot
parent 363ccb257d
commit eb3b9fe719
9 changed files with 62 additions and 38 deletions

View File

@ -43,7 +43,7 @@ namespace detail {
* Note this is a legacy method (from THRandom.cpp)
* FIXME: use std::random_device with entropy information
*/
#ifndef _WIN32
#if !defined(_WIN32) && !defined(__XROS__)
static uint64_t readURandomLong() {
int randDev = open("/dev/urandom", O_RDONLY);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -56,7 +56,7 @@ static uint64_t readURandomLong() {
close(randDev);
return randValue;
}
#endif // _WIN32
#endif // _WIN32 && __XROS__
/**
* Gets a non deterministic random number number from either the
@ -82,6 +82,9 @@ uint64_t getNonDeterministicRandom(bool is_cuda) {
s = (uint64_t)std::chrono::high_resolution_clock::now()
.time_since_epoch()
.count();
#elif defined(__XROS__)
std::random_device rd;
s = ((((uint64_t)rd()) << 32) + rd()) & 0x1FFFFFFFFFFFFF;
#elif defined(__SGX_ENABLED__)
TORCH_CHECK(
sgx_read_rand(reinterpret_cast<uint8_t*>(&s), sizeof(s)) == SGX_SUCCESS,

View File

@ -314,7 +314,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
// CUDA_KERNEL_ASSERT checks the assertion
// even when NDEBUG is defined. This is useful for important assertions in CUDA
// code that would otherwise be suppressed when building Release.
#if defined(__ANDROID__) || defined(__APPLE__) || \
#if defined(__ANDROID__) || defined(__APPLE__) || defined(__XROS__) || \
(defined(USE_ROCM) && ROCM_VERSION < 40100)
// Those platforms do not support assert()
#define CUDA_KERNEL_ASSERT(cond)

View File

@ -209,7 +209,9 @@ bool InitCaffeLogging(int* argc, char** argv) {
::google::InitGoogleLogging(argv[0]);
#if !defined(_MSC_VER)
// This is never defined on Windows
#if !defined(__XROS__)
::google::InstallFailureSignalHandler();
#endif
#endif
}
UpdateLoggingLevelsFromFlags();

View File

@ -4,7 +4,8 @@
#include <functional>
#include <memory>
#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__)
#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) || \
defined(__XROS__)
#define HAS_DEMANGLE 0
#elif defined(__APPLE__) && \
(TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)

View File

@ -20,6 +20,26 @@ C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size.");
namespace caffe2 {
namespace {
class ThreadPoolImpl : public ThreadPool {
public:
explicit ThreadPoolImpl(int numThreads);
~ThreadPoolImpl() override;
// Returns the number of threads currently in use
int getNumThreads() const override;
void setNumThreads(size_t numThreads) override;
void run(const std::function<void(int, size_t)>& fn, size_t range) override;
void withPool(const std::function<void(WorkersPool*)>& f) override;
private:
std::atomic_size_t numThreads_;
std::shared_ptr<WorkersPool> workersPool_;
std::vector<std::shared_ptr<Task>> tasks_;
};
}
size_t getDefaultNumThreads() {
CAFFE_ENFORCE(cpuinfo_initialize(), "cpuinfo initialization failed");
int numThreads = cpuinfo_get_processors_count();
@ -89,43 +109,40 @@ constexpr size_t kDefaultMinWorkSize = 1;
size_t ThreadPool::defaultNumThreads_ = 0;
ThreadPool* ThreadPool::createThreadPool(int numThreads) {
return new ThreadPoolImpl(numThreads);
}
std::unique_ptr<ThreadPool> ThreadPool::defaultThreadPool() {
defaultNumThreads_ = getDefaultNumThreads();
LOG(INFO) << "Constructing thread pool with " << defaultNumThreads_
<< " threads";
return std::make_unique<ThreadPool>(defaultNumThreads_);
return std::make_unique<ThreadPoolImpl>(defaultNumThreads_);
}
ThreadPool::ThreadPool(int numThreads)
: minWorkSize_(kDefaultMinWorkSize),
numThreads_(numThreads),
workersPool_(std::make_shared<WorkersPool>()) {}
ThreadPoolImpl::ThreadPoolImpl(int numThreads)
: numThreads_(numThreads),
workersPool_(std::make_shared<WorkersPool>()) {
minWorkSize_ = kDefaultMinWorkSize;
}
// NOLINTNEXTLINE(modernize-use-equals-default)
ThreadPool::~ThreadPool() {}
ThreadPoolImpl::~ThreadPoolImpl() {}
int ThreadPool::getNumThreads() const {
int ThreadPoolImpl::getNumThreads() const {
return numThreads_;
}
// Sets the number of threads
// # of threads should not be bigger than the number of big cores
void ThreadPool::setNumThreads(size_t numThreads) {
void ThreadPoolImpl::setNumThreads(size_t numThreads) {
if (defaultNumThreads_ == 0) {
defaultNumThreads_ = getDefaultNumThreads();
}
numThreads_ = std::min(numThreads, defaultNumThreads_);
}
// Sets the minimum work size (range) for which to invoke the
// threadpool; work sizes smaller than this will just be run on the
// main (calling) thread
void ThreadPool::setMinWorkSize(size_t size) {
std::lock_guard<std::mutex> guard(executionMutex_);
minWorkSize_ = size;
}
void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
void ThreadPoolImpl::run(const std::function<void(int, size_t)>& fn, size_t range) {
const auto numThreads = numThreads_.load(std::memory_order_relaxed);
std::lock_guard<std::mutex> guard(executionMutex_);
@ -183,7 +200,7 @@ void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
workersPool_->Execute(tasks_);
}
void ThreadPool::withPool(const std::function<void(WorkersPool*)>& f) {
void ThreadPoolImpl::withPool(const std::function<void(WorkersPool*)>& f) {
std::lock_guard<std::mutex> guard(executionMutex_);
f(workersPool_.get());
}

View File

@ -32,33 +32,34 @@ constexpr size_t kCacheLineSize = 64;
// TORCH_API and alignas annotations at the same time.
class TORCH_API /*alignas(kCacheLineSize)*/ ThreadPool {
public:
static ThreadPool* createThreadPool(int numThreads);
static std::unique_ptr<ThreadPool> defaultThreadPool();
ThreadPool(int numThreads);
~ThreadPool();
virtual ~ThreadPool() = default;
// Returns the number of threads currently in use
int getNumThreads() const;
void setNumThreads(size_t numThreads);
virtual int getNumThreads() const = 0;
virtual void setNumThreads(size_t numThreads) = 0;
// Sets the minimum work size (range) for which to invoke the
// threadpool; work sizes smaller than this will just be run on the
// main (calling) thread
void setMinWorkSize(size_t size);
void setMinWorkSize(size_t size) {
std::lock_guard<std::mutex> guard(executionMutex_);
minWorkSize_ = size;
}
size_t getMinWorkSize() const {
return minWorkSize_;
}
void run(const std::function<void(int, size_t)>& fn, size_t range);
virtual void run(const std::function<void(int, size_t)>& fn, size_t range) = 0;
// Run an arbitrary function in a thread-safe manner accessing the Workers
// Pool
void withPool(const std::function<void(WorkersPool*)>& fn);
virtual void withPool(const std::function<void(WorkersPool*)>& fn) = 0;
private:
protected:
static size_t defaultNumThreads_;
mutable std::mutex executionMutex_;
size_t minWorkSize_;
std::atomic_size_t numThreads_;
std::shared_ptr<WorkersPool> workersPool_;
std::vector<std::shared_ptr<Task>> tasks_;
};
} // namespace caffe2

View File

@ -83,7 +83,7 @@ size_t getDefaultNumThreads();
PThreadPool* pthreadpool() {
static auto threadpool =
std::make_unique<PThreadPool>(getDefaultNumThreads());
#ifndef WIN32
#if !(defined(WIN32)) && !(defined(__XROS__))
static std::once_flag flag;
std::call_once(flag, []() {
pthread_atfork(nullptr, nullptr, child_atfork);

View File

@ -8,7 +8,7 @@
#include <stddef.h> // for size_t
#include <stdint.h> // for uint32_t
#ifdef USE_PTHREADPOOL
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
// This is a hack.
// Mainly introduced here because
// 1. NNPACK can be compiled to use internal legacy threadpool implementation because much of C2 depends on that.

View File

@ -2,7 +2,7 @@
#include "caffe2/utils/threadpool/pthreadpool-cpp.h"
#include "caffe2/utils/threadpool/ThreadPool.h"
#ifdef USE_PTHREADPOOL
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
namespace caffe2 {
namespace {
static thread_local bool using_new_threadpool{false};
@ -34,7 +34,7 @@ void legacy_pthreadpool_compute_1d(
}
return;
}
#ifdef USE_PTHREADPOOL
#if defined(USE_PTHREADPOOL) && !(defined(__XROS__))
if (caffe2::using_new_threadpool) {
pthreadpool_parallelize_1d(threadpool, function, argument, range, 0u);
} else {
@ -76,7 +76,7 @@ legacy_pthreadpool_t legacy_pthreadpool_create(size_t threads_count) {
std::mutex thread_pool_creation_mutex_;
std::lock_guard<std::mutex> guard(thread_pool_creation_mutex_);
return reinterpret_cast<legacy_pthreadpool_t>(new caffe2::ThreadPool(threads_count));
return reinterpret_cast<legacy_pthreadpool_t>(caffe2::ThreadPool::createThreadPool(threads_count));
}
void legacy_pthreadpool_destroy(legacy_pthreadpool_t pthreadpool) {