Files
pytorch/c10/core/impl/DeviceGuardImplInterface.cpp
Sherlock Huang f1eb99e2e4 [Reland] Return NoOpDeviceGuardImpl in replace of CudaDeviceGuard when device is not available, or cpu-only build (#163016)
Reland of #160532

Summary:

To support exporting a cuda model on a CPU-only machine under fake tensor mode.
User commonly need to move sample inputs to the cuda device with .to("cuda:0") or .to("cuda") call.
This diff supports this.
I expect the following pattern to work
```
with FakeTensorMode(allow_non_fake_inputs=True):
    cuda_module = module.to("cuda:0")
    cuda_sample_inputs = tuple([x.to("cuda:0") for x in sample_inputs])
    with torch.no_grad():
        ep = torch.export.export(cuda_module, cuda_sample_inputs)
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163016
Approved by: https://github.com/huydhn
2025-09-17 05:01:33 +00:00

41 lines
1.3 KiB
C++

#include <c10/core/impl/DeviceGuardImplInterface.h>
#include <c10/core/impl/FakeGuardImpl.h>
#include <array>
namespace c10::impl {
std::array<
std::atomic<const DeviceGuardImplInterface*>,
static_cast<size_t>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)>
device_guard_impl_registry;
DeviceGuardImplRegistrar::DeviceGuardImplRegistrar(
DeviceType type,
const DeviceGuardImplInterface* impl) {
device_guard_impl_registry[static_cast<size_t>(type)].store(impl);
}
namespace {
thread_local std::unique_ptr<DeviceGuardImplInterface> tls_fake_device_guard =
nullptr;
}
void ensureCUDADeviceGuardSet() {
constexpr auto cuda_idx = static_cast<std::size_t>(DeviceType::CUDA);
const DeviceGuardImplInterface* p =
device_guard_impl_registry[cuda_idx].load();
// A non-null `ptr` indicates that the CUDA guard is already set up,
// implying this is using cuda build
if (p && p->deviceCount() == 0) {
// In following cases, we override CUDA guard interface with a no-op
// device guard. When p->deviceCount() == 0, cuda build is enabled, but no
// cuda devices available.
tls_fake_device_guard = std::make_unique<FakeGuardImpl<DeviceType::CUDA>>();
device_guard_impl_registry[cuda_idx].store(tls_fake_device_guard.get());
}
}
} // namespace c10::impl