mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Increase C10_COMPILE_TIME_MAX_GPUS to 128 (#144138)"
This reverts commit 6cfc08167595e27ee9a5701c6426a7a8a7e387ef. Reverted https://github.com/pytorch/pytorch/pull/144138 on behalf of https://github.com/albanD due to This seems to impact the caffe2 code ([comment](https://github.com/pytorch/pytorch/pull/144138#issuecomment-2590891200))
This commit is contained in:
@ -132,14 +132,7 @@ Device::Device(const std::string& device_string) : Device(Type::CPU) {
|
||||
|
||||
try {
|
||||
if (!device_index_str.empty()) {
|
||||
auto index = std::stoi(device_index_str);
|
||||
TORCH_CHECK(
|
||||
index <=
|
||||
static_cast<int>(std::numeric_limits<c10::DeviceIndex>::max()),
|
||||
"Device index '",
|
||||
device_index_str,
|
||||
"' is out of range");
|
||||
index_ = static_cast<c10::DeviceIndex>(index);
|
||||
index_ = static_cast<c10::DeviceIndex>(std::stoi(device_index_str));
|
||||
}
|
||||
} catch (const std::exception&) {
|
||||
TORCH_CHECK(
|
||||
|
@ -169,7 +169,7 @@ struct C10_API Device final {
|
||||
private:
|
||||
DeviceType type_;
|
||||
DeviceIndex index_ = -1;
|
||||
void validate() const {
|
||||
void validate() {
|
||||
// Removing these checks in release builds noticeably improves
|
||||
// performance in micro-benchmarks.
|
||||
// This is safe to do, because backends that use the DeviceIndex
|
||||
|
@ -1,6 +1,4 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#ifndef C10_USING_CUSTOM_GENERATED_MACROS
|
||||
|
||||
@ -49,6 +47,5 @@ o */
|
||||
// fbcode depends on this value being 16
|
||||
#define C10_COMPILE_TIME_MAX_GPUS 16
|
||||
#else
|
||||
constexpr std::int64_t C10_COMPILE_TIME_MAX_GPUS =
|
||||
std::numeric_limits<int8_t>::max() + 1;
|
||||
#define C10_COMPILE_TIME_MAX_GPUS 120
|
||||
#endif
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace c10::cuda {
|
||||
|
||||
@ -175,16 +174,12 @@ static void initGlobalStreamState() {
|
||||
num_gpus = device_count();
|
||||
// Check if the number of GPUs matches the expected compile-time max number
|
||||
// of GPUs.
|
||||
if constexpr (
|
||||
C10_COMPILE_TIME_MAX_GPUS <
|
||||
std::numeric_limits<decltype(num_gpus)>::max()) {
|
||||
TORCH_CHECK(
|
||||
num_gpus <= C10_COMPILE_TIME_MAX_GPUS,
|
||||
"Number of CUDA devices on the machine is larger than the compiled "
|
||||
"max number of gpus expected (",
|
||||
C10_COMPILE_TIME_MAX_GPUS,
|
||||
"). Increase that and recompile.");
|
||||
}
|
||||
TORCH_CHECK(
|
||||
num_gpus <= C10_COMPILE_TIME_MAX_GPUS,
|
||||
"Number of CUDA devices on the machine is larger than the compiled "
|
||||
"max number of gpus expected (",
|
||||
C10_COMPILE_TIME_MAX_GPUS,
|
||||
"). Increase that and recompile.");
|
||||
int leastPriority = -1, greatestPriority = -1;
|
||||
C10_CUDA_CHECK(
|
||||
cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
|
||||
|
Reference in New Issue
Block a user