Check all CUDA API calls for errors in torch/ (#81560)

Summary:
Original commit changeset: 0bb770d2cdb2

Original Phabricator Diff: D35194935 (79e5b053b6)

Differential Revision: D35291874

Pull Request resolved: https://github.com/pytorch/pytorch/pull/81560
Approved by: https://github.com/ezyang
This commit is contained in:
Richard Barnes
2022-10-28 00:40:47 +00:00
committed by PyTorch MergeBot
parent 4e3a0ff92e
commit 3ece9fb45d
6 changed files with 22 additions and 21 deletions

View File

@ -195,7 +195,7 @@ CudaIPCSentData::~CudaIPCSentData() {
try {
if (event_sync_required_) {
at::cuda::CUDAGuard device_guard(device_.index());
cudaEventDestroy(event_);
C10_CUDA_CHECK(cudaEventDestroy(event_));
if (!CudaIPCGlobalEntities::alive) {
return;
}

View File

@ -3,6 +3,7 @@
#include <torch/csrc/cuda/nccl.h>
#include <ATen/ATen.h>
#include <c10/cuda/CUDAException.h>
#include <c10/cuda/CUDAGuard.h>
#include <c10/util/Exception.h>
#include <c10/util/hash.h>
@ -142,7 +143,7 @@ struct NcclCommList {
if (comms) {
for (const auto i : c10::irange(ndevices)) {
int dummy_var;
if (cudaGetDevice(&dummy_var) != cudaSuccess) {
if (C10_CUDA_ERROR_HANDLED(cudaGetDevice(&dummy_var)) != cudaSuccess) {
/* there are cases when this destructor is called after the
CUDA driver is already unloaded from the process.
In these cases, skip ncclCommDestroy */

View File

@ -71,25 +71,26 @@ void initCudartBindings(PyObject* module) {
"cuda"
"HostRegister",
[](uintptr_t ptr, size_t size, unsigned int flags) -> cudaError_t {
return cudaHostRegister((void*)ptr, size, flags);
return C10_CUDA_ERROR_HANDLED(
cudaHostRegister((void*)ptr, size, flags));
});
cudart.def(
"cuda"
"HostUnregister",
[](uintptr_t ptr) -> cudaError_t {
return cudaHostUnregister((void*)ptr);
return C10_CUDA_ERROR_HANDLED(cudaHostUnregister((void*)ptr));
});
cudart.def(
"cuda"
"StreamCreate",
[](uintptr_t ptr) -> cudaError_t {
return cudaStreamCreate((cudaStream_t*)ptr);
return C10_CUDA_ERROR_HANDLED(cudaStreamCreate((cudaStream_t*)ptr));
});
cudart.def(
"cuda"
"StreamDestroy",
[](uintptr_t ptr) -> cudaError_t {
return cudaStreamDestroy((cudaStream_t)ptr);
return C10_CUDA_ERROR_HANDLED(cudaStreamDestroy((cudaStream_t)ptr));
});
#if !defined(USE_ROCM)
cudart.def(
@ -104,7 +105,7 @@ void initCudartBindings(PyObject* module) {
c10::cuda::CUDAGuard guard(device);
size_t device_free = 0;
size_t device_total = 0;
cudaMemGetInfo(&device_free, &device_total);
C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
return {device_free, device_total};
});
}

View File

@ -1175,9 +1175,9 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
if (measure_kernel_time_ ||
isDebugDumpEnabled(DebugDumpOption::EffectiveBandwidth) ||
isDebugDumpEnabled(DebugDumpOption::PerfDebugVerbose)) {
cudaEventCreate(&start_event);
cudaEventCreate(&finish_event);
cudaEventRecord(start_event);
C10_CUDA_CHECK(cudaEventCreate(&start_event));
C10_CUDA_CHECK(cudaEventCreate(&finish_event));
C10_CUDA_CHECK(cudaEventRecord(start_event));
}
if (execute_kernel_) {
@ -1233,12 +1233,13 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
if (measure_kernel_time_ ||
isDebugDumpEnabled(DebugDumpOption::EffectiveBandwidth) ||
isDebugDumpEnabled(DebugDumpOption::PerfDebugVerbose)) {
cudaEventRecord(finish_event);
cudaEventSynchronize(start_event);
cudaEventSynchronize(finish_event);
cudaEventElapsedTime(&kernel_time_ms_, start_event, finish_event);
cudaEventDestroy(start_event);
cudaEventDestroy(finish_event);
C10_CUDA_CHECK(cudaEventRecord(finish_event));
C10_CUDA_CHECK(cudaEventSynchronize(start_event));
C10_CUDA_CHECK(cudaEventSynchronize(finish_event));
C10_CUDA_CHECK(
cudaEventElapsedTime(&kernel_time_ms_, start_event, finish_event));
C10_CUDA_CHECK(cudaEventDestroy(start_event));
C10_CUDA_CHECK(cudaEventDestroy(finish_event));
bytes_processed_ = 0;
// Figure how many bytes are inputs, outputs, and temporary buffers

View File

@ -941,7 +941,7 @@ void initializeCudaContext() {
if (!pctx) {
std::unique_lock<std::mutex> cudaFreeMutexLock(
*(c10::cuda::getFreeMutex()));
cudaFree(nullptr);
C10_CUDA_CHECK(cudaFree(nullptr));
}
}

View File

@ -81,16 +81,14 @@ struct CUDAMethods : public ProfilerStubs {
void onEachDevice(std::function<void(int)> op) const override {
at::cuda::OptionalCUDAGuard device_guard;
// NOLINTNEXTLINE(bugprone-signed-char-misuse)
int count = at::cuda::device_count();
for (const auto i : c10::irange(count)) {
for (const auto i : c10::irange(at::cuda::device_count())) {
device_guard.set_index(i);
op(i);
}
}
void synchronize() const override {
cudaDeviceSynchronize();
TORCH_CUDA_CHECK(cudaDeviceSynchronize());
}
bool enabled() const override {