Revert "Increase C10_COMPILE_TIME_MAX_GPUS to 128 (#144138)"

This reverts commit 6cfc08167595e27ee9a5701c6426a7a8a7e387ef.

Reverted https://github.com/pytorch/pytorch/pull/144138 on behalf of https://github.com/albanD due to This seems to impact the caffe2 code ([comment](https://github.com/pytorch/pytorch/pull/144138#issuecomment-2590891200))
This commit is contained in:
PyTorch MergeBot
2025-01-14 19:04:12 +00:00
parent b4b4e57469
commit bdd942efd7
4 changed files with 9 additions and 24 deletions

View File

@ -132,14 +132,7 @@ Device::Device(const std::string& device_string) : Device(Type::CPU) {
try {
if (!device_index_str.empty()) {
auto index = std::stoi(device_index_str);
TORCH_CHECK(
index <=
static_cast<int>(std::numeric_limits<c10::DeviceIndex>::max()),
"Device index '",
device_index_str,
"' is out of range");
index_ = static_cast<c10::DeviceIndex>(index);
index_ = static_cast<c10::DeviceIndex>(std::stoi(device_index_str));
}
} catch (const std::exception&) {
TORCH_CHECK(

View File

@ -169,7 +169,7 @@ struct C10_API Device final {
private:
DeviceType type_;
DeviceIndex index_ = -1;
void validate() const {
void validate() {
// Removing these checks in release builds noticeably improves
// performance in micro-benchmarks.
// This is safe to do, because backends that use the DeviceIndex

View File

@ -1,6 +1,4 @@
#pragma once
#include <cstdint>
#include <limits>
#ifndef C10_USING_CUSTOM_GENERATED_MACROS
@ -49,6 +47,5 @@ o */
// fbcode depends on this value being 16
#define C10_COMPILE_TIME_MAX_GPUS 16
#else
constexpr std::int64_t C10_COMPILE_TIME_MAX_GPUS =
std::numeric_limits<int8_t>::max() + 1;
#define C10_COMPILE_TIME_MAX_GPUS 120
#endif

View File

@ -9,7 +9,6 @@
#include <array>
#include <atomic>
#include <cstdint>
#include <limits>
namespace c10::cuda {
@ -175,16 +174,12 @@ static void initGlobalStreamState() {
num_gpus = device_count();
// Check if the number of GPUs matches the expected compile-time max number
// of GPUs.
if constexpr (
C10_COMPILE_TIME_MAX_GPUS <
std::numeric_limits<decltype(num_gpus)>::max()) {
TORCH_CHECK(
num_gpus <= C10_COMPILE_TIME_MAX_GPUS,
"Number of CUDA devices on the machine is larger than the compiled "
"max number of gpus expected (",
C10_COMPILE_TIME_MAX_GPUS,
"). Increase that and recompile.");
}
TORCH_CHECK(
num_gpus <= C10_COMPILE_TIME_MAX_GPUS,
"Number of CUDA devices on the machine is larger than the compiled "
"max number of gpus expected (",
C10_COMPILE_TIME_MAX_GPUS,
"). Increase that and recompile.");
int leastPriority = -1, greatestPriority = -1;
C10_CUDA_CHECK(
cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));