mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Fix CUDA_MAX_THREADS_PER_SM for sm_87 (#88644)
#88326 CC @ngimel @ptrblck Pull Request resolved: https://github.com/pytorch/pytorch/pull/88644 Approved by: https://github.com/ngimel
This commit is contained in:
committed by
PyTorch MergeBot
parent
6bb7f4f29f
commit
3e30a9ea1c
@ -255,13 +255,13 @@ using namespace c10::hip;
|
||||
// constants from
|
||||
// (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications)
|
||||
// The maximum number of threads per multiprocessor is 1024 for Turing
|
||||
// architecture (7.5), 1536 for Geforce Ampere (8.6), and 2048 for all other
|
||||
// architectures. You'll get warnings if you exceed these constants. Hence, the
|
||||
// following macros adjust the input values from the user to resolve potential
|
||||
// warnings.
|
||||
// architecture (7.5), 1536 for Geforce Ampere (8.6)/Jetson Orin (8.7), and
|
||||
// 2048 for all other architectures. You'll get warnings if you exceed these
|
||||
// constants. Hence, the following macros adjust the input values from the user
|
||||
// to resolve potential warnings.
|
||||
#if __CUDA_ARCH__ == 750
|
||||
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024;
|
||||
#elif __CUDA_ARCH__ == 860
|
||||
#elif __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870
|
||||
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536;
|
||||
#else
|
||||
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048;
|
||||
|
||||
Reference in New Issue
Block a user