mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Update Cutlass to v2.11 (#94188)"
This reverts commit a0f9abdcb651bb948d2d6e9f7d3ce947e2c53659. Reverted https://github.com/pytorch/pytorch/pull/94188 on behalf of https://github.com/ezyang due to bouncing this to derisk branch cut
This commit is contained in:
@ -414,6 +414,7 @@ cc_library(
|
|||||||
torch_cuda_half_options = [
|
torch_cuda_half_options = [
|
||||||
"-DCUDA_HAS_FP16=1",
|
"-DCUDA_HAS_FP16=1",
|
||||||
"-D__CUDA_NO_HALF_OPERATORS__",
|
"-D__CUDA_NO_HALF_OPERATORS__",
|
||||||
|
"-D__CUDA_NO_HALF_CONVERSIONS__",
|
||||||
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
|
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
|
||||||
"-D__CUDA_NO_HALF2_OPERATORS__",
|
"-D__CUDA_NO_HALF2_OPERATORS__",
|
||||||
]
|
]
|
||||||
|
@ -49,14 +49,14 @@ __device__ __forceinline__ void fastSpecializedAtomicAdd(
|
|||||||
|
|
||||||
if (low_byte && index < (numel - 1)) {
|
if (low_byte && index < (numel - 1)) {
|
||||||
__half2 value2;
|
__half2 value2;
|
||||||
value2.x = static_cast<__half>(value);
|
value2.x = value;
|
||||||
value2.y = __int2half_rz(0);
|
value2.y = __int2half_rz(0);
|
||||||
atomicAdd(reinterpret_cast<__half2*>(target_addr), value2);
|
atomicAdd(reinterpret_cast<__half2*>(target_addr), value2);
|
||||||
|
|
||||||
} else if (!low_byte && index > 0) {
|
} else if (!low_byte && index > 0) {
|
||||||
__half2 value2;
|
__half2 value2;
|
||||||
value2.x = __int2half_rz(0);
|
value2.x = __int2half_rz(0);
|
||||||
value2.y = static_cast<__half>(value);
|
value2.y = value;
|
||||||
atomicAdd(reinterpret_cast<__half2*>(target_addr - 1), value2);
|
atomicAdd(reinterpret_cast<__half2*>(target_addr - 1), value2);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -21,7 +21,7 @@ __device__ void test(){
|
|||||||
|
|
||||||
__half a = __float2half(3.0f);
|
__half a = __float2half(3.0f);
|
||||||
__half b = __float2half(2.0f);
|
__half b = __float2half(2.0f);
|
||||||
__half c = Half(a) - Half(b);
|
__half c = a - Half(b);
|
||||||
assert(static_cast<Half>(c) == Half(1.0));
|
assert(static_cast<Half>(c) == Half(1.0));
|
||||||
|
|
||||||
// asserting if the functions used on
|
// asserting if the functions used on
|
||||||
|
@ -1653,6 +1653,7 @@ if(NOT INTERN_BUILD_MOBILE)
|
|||||||
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
|
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
|
||||||
string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
|
string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
|
||||||
" -D__CUDA_NO_HALF_OPERATORS__"
|
" -D__CUDA_NO_HALF_OPERATORS__"
|
||||||
|
" -D__CUDA_NO_HALF_CONVERSIONS__"
|
||||||
" -D__CUDA_NO_HALF2_OPERATORS__"
|
" -D__CUDA_NO_HALF2_OPERATORS__"
|
||||||
" -D__CUDA_NO_BFLOAT16_CONVERSIONS__")
|
" -D__CUDA_NO_BFLOAT16_CONVERSIONS__")
|
||||||
|
|
||||||
|
2
third_party/cutlass
vendored
2
third_party/cutlass
vendored
Submodule third_party/cutlass updated: 66d9cddc83...b72cbf957d
@ -225,6 +225,7 @@ MSVC_IGNORE_CUDAFE_WARNINGS = [
|
|||||||
|
|
||||||
COMMON_NVCC_FLAGS = [
|
COMMON_NVCC_FLAGS = [
|
||||||
'-D__CUDA_NO_HALF_OPERATORS__',
|
'-D__CUDA_NO_HALF_OPERATORS__',
|
||||||
|
'-D__CUDA_NO_HALF_CONVERSIONS__',
|
||||||
'-D__CUDA_NO_BFLOAT16_CONVERSIONS__',
|
'-D__CUDA_NO_BFLOAT16_CONVERSIONS__',
|
||||||
'-D__CUDA_NO_HALF2_OPERATORS__',
|
'-D__CUDA_NO_HALF2_OPERATORS__',
|
||||||
'--expt-relaxed-constexpr'
|
'--expt-relaxed-constexpr'
|
||||||
|
Reference in New Issue
Block a user