diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp index 46d73bc897a5..d534ec5a1785 100644 --- a/aten/src/ATen/cuda/CUDABlas.cpp +++ b/aten/src/ATen/cuda/CUDABlas.cpp @@ -183,22 +183,13 @@ uint32_t _getAlignment(uintptr_t address) { static size_t _parseChosenWorkspaceSize() { const char * val = getenv("CUBLASLT_WORKSPACE_SIZE"); - size_t workspace_size = 1024; #ifdef USE_ROCM if (!val) { // accept either env var val = getenv("HIPBLASLT_WORKSPACE_SIZE"); } -#else - cudaDeviceProp* p = at::cuda::getDeviceProperties(c10::cuda::current_device()); - // Keep workspace_size = 1024 for small Ampere GPUs - // See https://github.com/pytorch/pytorch/pull/120925#issuecomment-1977556485 - if (p->major == 8 && p->totalGlobalMem / 1073741824 >= 24) { - workspace_size = 4096; - } else if (p->major >= 9) { - workspace_size = 32768; - } #endif + size_t workspace_size = 1024; /* default size in KiB according to #73328 */ if (val) { try { workspace_size = std::stoi(val);