diff --git a/c10/core/GeneratorImpl.cpp b/c10/core/GeneratorImpl.cpp index 720b1b27a68a..4022b150d084 100644 --- a/c10/core/GeneratorImpl.cpp +++ b/c10/core/GeneratorImpl.cpp @@ -43,7 +43,7 @@ namespace detail { * Note this is a legacy method (from THRandom.cpp) * FIXME: use std::random_device with entropy information */ -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(__XROS__) static uint64_t readURandomLong() { int randDev = open("/dev/urandom", O_RDONLY); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -56,7 +56,7 @@ static uint64_t readURandomLong() { close(randDev); return randValue; } -#endif // _WIN32 +#endif // _WIN32 && __XROS__ /** * Gets a non deterministic random number number from either the @@ -82,6 +82,9 @@ uint64_t getNonDeterministicRandom(bool is_cuda) { s = (uint64_t)std::chrono::high_resolution_clock::now() .time_since_epoch() .count(); +#elif defined(__XROS__) + std::random_device rd; + s = ((((uint64_t)rd()) << 32) + rd()) & 0x1FFFFFFFFFFFFF; #elif defined(__SGX_ENABLED__) TORCH_CHECK( sgx_read_rand(reinterpret_cast(&s), sizeof(s)) == SGX_SUCCESS, diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index 123d7a4717d1..59e15227488d 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -314,7 +314,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; // CUDA_KERNEL_ASSERT checks the assertion // even when NDEBUG is defined. This is useful for important assertions in CUDA // code that would otherwise be suppressed when building Release. -#if defined(__ANDROID__) || defined(__APPLE__) || \ +#if defined(__ANDROID__) || defined(__APPLE__) || defined(__XROS__) || \ (defined(USE_ROCM) && ROCM_VERSION < 40100) // Those platforms do not support assert() #define CUDA_KERNEL_ASSERT(cond) diff --git a/c10/util/Logging.cpp b/c10/util/Logging.cpp index bf7bd9852d2b..c1ede582ff2a 100644 --- a/c10/util/Logging.cpp +++ b/c10/util/Logging.cpp @@ -209,7 +209,9 @@ bool InitCaffeLogging(int* argc, char** argv) { ::google::InitGoogleLogging(argv[0]); #if !defined(_MSC_VER) // This is never defined on Windows +#if !defined(__XROS__) ::google::InstallFailureSignalHandler(); +#endif #endif } UpdateLoggingLevelsFromFlags(); diff --git a/c10/util/Type.cpp b/c10/util/Type.cpp index 8366683a5015..e0b2c4c3171d 100644 --- a/c10/util/Type.cpp +++ b/c10/util/Type.cpp @@ -4,7 +4,8 @@ #include #include -#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) +#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) || \ + defined(__XROS__) #define HAS_DEMANGLE 0 #elif defined(__APPLE__) && \ (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE) diff --git a/caffe2/utils/threadpool/ThreadPool.cc b/caffe2/utils/threadpool/ThreadPool.cc index f71304715803..3f0a2adc233c 100644 --- a/caffe2/utils/threadpool/ThreadPool.cc +++ b/caffe2/utils/threadpool/ThreadPool.cc @@ -20,6 +20,26 @@ C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size."); namespace caffe2 { +namespace { + class ThreadPoolImpl : public ThreadPool { + public: + explicit ThreadPoolImpl(int numThreads); + ~ThreadPoolImpl() override; + + // Returns the number of threads currently in use + int getNumThreads() const override; + void setNumThreads(size_t numThreads) override; + + void run(const std::function& fn, size_t range) override; + void withPool(const std::function& f) override; + + private: + std::atomic_size_t numThreads_; + std::shared_ptr workersPool_; + std::vector> tasks_; + }; +} + size_t getDefaultNumThreads() { CAFFE_ENFORCE(cpuinfo_initialize(), "cpuinfo initialization failed"); int numThreads = cpuinfo_get_processors_count(); @@ -89,43 +109,40 @@ constexpr size_t kDefaultMinWorkSize = 1; size_t ThreadPool::defaultNumThreads_ = 0; +ThreadPool* ThreadPool::createThreadPool(int numThreads) { + return new ThreadPoolImpl(numThreads); +} + std::unique_ptr ThreadPool::defaultThreadPool() { defaultNumThreads_ = getDefaultNumThreads(); LOG(INFO) << "Constructing thread pool with " << defaultNumThreads_ << " threads"; - return std::make_unique(defaultNumThreads_); + return std::make_unique(defaultNumThreads_); } -ThreadPool::ThreadPool(int numThreads) - : minWorkSize_(kDefaultMinWorkSize), - numThreads_(numThreads), - workersPool_(std::make_shared()) {} +ThreadPoolImpl::ThreadPoolImpl(int numThreads) + : numThreads_(numThreads), + workersPool_(std::make_shared()) { + minWorkSize_ = kDefaultMinWorkSize; +} // NOLINTNEXTLINE(modernize-use-equals-default) -ThreadPool::~ThreadPool() {} +ThreadPoolImpl::~ThreadPoolImpl() {} -int ThreadPool::getNumThreads() const { +int ThreadPoolImpl::getNumThreads() const { return numThreads_; } // Sets the number of threads // # of threads should not be bigger than the number of big cores -void ThreadPool::setNumThreads(size_t numThreads) { +void ThreadPoolImpl::setNumThreads(size_t numThreads) { if (defaultNumThreads_ == 0) { defaultNumThreads_ = getDefaultNumThreads(); } numThreads_ = std::min(numThreads, defaultNumThreads_); } -// Sets the minimum work size (range) for which to invoke the -// threadpool; work sizes smaller than this will just be run on the -// main (calling) thread -void ThreadPool::setMinWorkSize(size_t size) { - std::lock_guard guard(executionMutex_); - minWorkSize_ = size; -} - -void ThreadPool::run(const std::function& fn, size_t range) { +void ThreadPoolImpl::run(const std::function& fn, size_t range) { const auto numThreads = numThreads_.load(std::memory_order_relaxed); std::lock_guard guard(executionMutex_); @@ -183,7 +200,7 @@ void ThreadPool::run(const std::function& fn, size_t range) { workersPool_->Execute(tasks_); } -void ThreadPool::withPool(const std::function& f) { +void ThreadPoolImpl::withPool(const std::function& f) { std::lock_guard guard(executionMutex_); f(workersPool_.get()); } diff --git a/caffe2/utils/threadpool/ThreadPool.h b/caffe2/utils/threadpool/ThreadPool.h index 951b8f7f6bef..af21b6c14c95 100644 --- a/caffe2/utils/threadpool/ThreadPool.h +++ b/caffe2/utils/threadpool/ThreadPool.h @@ -32,33 +32,34 @@ constexpr size_t kCacheLineSize = 64; // TORCH_API and alignas annotations at the same time. class TORCH_API /*alignas(kCacheLineSize)*/ ThreadPool { public: + static ThreadPool* createThreadPool(int numThreads); static std::unique_ptr defaultThreadPool(); - ThreadPool(int numThreads); - ~ThreadPool(); + virtual ~ThreadPool() = default; // Returns the number of threads currently in use - int getNumThreads() const; - void setNumThreads(size_t numThreads); + virtual int getNumThreads() const = 0; + virtual void setNumThreads(size_t numThreads) = 0; // Sets the minimum work size (range) for which to invoke the // threadpool; work sizes smaller than this will just be run on the // main (calling) thread - void setMinWorkSize(size_t size); + void setMinWorkSize(size_t size) { + std::lock_guard guard(executionMutex_); + minWorkSize_ = size; + } + size_t getMinWorkSize() const { return minWorkSize_; } - void run(const std::function& fn, size_t range); + virtual void run(const std::function& fn, size_t range) = 0; // Run an arbitrary function in a thread-safe manner accessing the Workers // Pool - void withPool(const std::function& fn); + virtual void withPool(const std::function& fn) = 0; - private: + protected: static size_t defaultNumThreads_; mutable std::mutex executionMutex_; size_t minWorkSize_; - std::atomic_size_t numThreads_; - std::shared_ptr workersPool_; - std::vector> tasks_; }; } // namespace caffe2 diff --git a/caffe2/utils/threadpool/pthreadpool-cpp.cc b/caffe2/utils/threadpool/pthreadpool-cpp.cc index 0440e62e55f9..38846d5b143d 100644 --- a/caffe2/utils/threadpool/pthreadpool-cpp.cc +++ b/caffe2/utils/threadpool/pthreadpool-cpp.cc @@ -83,7 +83,7 @@ size_t getDefaultNumThreads(); PThreadPool* pthreadpool() { static auto threadpool = std::make_unique(getDefaultNumThreads()); -#ifndef WIN32 +#if !(defined(WIN32)) && !(defined(__XROS__)) static std::once_flag flag; std::call_once(flag, []() { pthread_atfork(nullptr, nullptr, child_atfork); diff --git a/caffe2/utils/threadpool/pthreadpool.h b/caffe2/utils/threadpool/pthreadpool.h index 0c6cc3661e05..54b3cb63303c 100644 --- a/caffe2/utils/threadpool/pthreadpool.h +++ b/caffe2/utils/threadpool/pthreadpool.h @@ -8,7 +8,7 @@ #include // for size_t #include // for uint32_t -#ifdef USE_PTHREADPOOL +#if defined(USE_PTHREADPOOL) && !(defined(__XROS__)) // This is a hack. // Mainly introduced here because // 1. NNPACK can be compiled to use internal legacy threadpool implementation because much of C2 depends on that. diff --git a/caffe2/utils/threadpool/pthreadpool_impl.cc b/caffe2/utils/threadpool/pthreadpool_impl.cc index 8165ae3571ca..72bee75678ec 100644 --- a/caffe2/utils/threadpool/pthreadpool_impl.cc +++ b/caffe2/utils/threadpool/pthreadpool_impl.cc @@ -2,7 +2,7 @@ #include "caffe2/utils/threadpool/pthreadpool-cpp.h" #include "caffe2/utils/threadpool/ThreadPool.h" -#ifdef USE_PTHREADPOOL +#if defined(USE_PTHREADPOOL) && !(defined(__XROS__)) namespace caffe2 { namespace { static thread_local bool using_new_threadpool{false}; @@ -34,7 +34,7 @@ void legacy_pthreadpool_compute_1d( } return; } -#ifdef USE_PTHREADPOOL +#if defined(USE_PTHREADPOOL) && !(defined(__XROS__)) if (caffe2::using_new_threadpool) { pthreadpool_parallelize_1d(threadpool, function, argument, range, 0u); } else { @@ -76,7 +76,7 @@ legacy_pthreadpool_t legacy_pthreadpool_create(size_t threads_count) { std::mutex thread_pool_creation_mutex_; std::lock_guard guard(thread_pool_creation_mutex_); - return reinterpret_cast(new caffe2::ThreadPool(threads_count)); + return reinterpret_cast(caffe2::ThreadPool::createThreadPool(threads_count)); } void legacy_pthreadpool_destroy(legacy_pthreadpool_t pthreadpool) {