Revert "Update Cutlass to v2.11 (#94188)"

This reverts commit a0f9abdcb651bb948d2d6e9f7d3ce947e2c53659. Reverted https://github.com/pytorch/pytorch/pull/94188 on behalf of https://github.com/ezyang due to bouncing this to derisk branch cut
2025-10-20 21:14:14 +08:00 · 2023-02-13 19:03:36 +00:00
parent f70ba23415
commit 36dfbb08f3
6 changed files with 7 additions and 4 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -414,6 +414,7 @@ cc_library(
 torch_cuda_half_options = [
    "-DCUDA_HAS_FP16=1",
    "-D__CUDA_NO_HALF_OPERATORS__",
    "-D__CUDA_NO_HALF_CONVERSIONS__",
    "-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
    "-D__CUDA_NO_HALF2_OPERATORS__",
 ]
--- a/aten/src/ATen/native/cuda/KernelUtils.cuh
+++ b/aten/src/ATen/native/cuda/KernelUtils.cuh
@ -49,14 +49,14 @@ __device__ __forceinline__ void fastSpecializedAtomicAdd(
  if (low_byte && index < (numel - 1)) {
    __half2 value2;
-    value2.x = static_cast<__half>(value);
+    value2.x = value;
    value2.y = __int2half_rz(0);
    atomicAdd(reinterpret_cast<__half2*>(target_addr), value2);
  } else if (!low_byte && index > 0) {
    __half2 value2;
    value2.x = __int2half_rz(0);
-    value2.y = static_cast<__half>(value);
+    value2.y = value;
    atomicAdd(reinterpret_cast<__half2*>(target_addr - 1), value2);
  } else {
--- a/aten/src/ATen/test/cuda_half_test.cu
+++ b/aten/src/ATen/test/cuda_half_test.cu
@ -21,7 +21,7 @@ __device__ void test(){
  __half a = __float2half(3.0f);
  __half b = __float2half(2.0f);
-  __half c = Half(a) - Half(b);
+  __half c = a - Half(b);
  assert(static_cast<Half>(c) == Half(1.0));
  // asserting if the  functions used on
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -1653,6 +1653,7 @@ if(NOT INTERN_BUILD_MOBILE)
  message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
  string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
                                 " -D__CUDA_NO_HALF_OPERATORS__"
                                 " -D__CUDA_NO_HALF_CONVERSIONS__"
                                 " -D__CUDA_NO_HALF2_OPERATORS__"
                                 " -D__CUDA_NO_BFLOAT16_CONVERSIONS__")
--- a/third_party/cutlass
+++ b/third_party/cutlass
--- a/torch/utils/cpp_extension.py
+++ b/torch/utils/cpp_extension.py
@ -225,6 +225,7 @@ MSVC_IGNORE_CUDAFE_WARNINGS = [
 COMMON_NVCC_FLAGS = [
    '-D__CUDA_NO_HALF_OPERATORS__',
    '-D__CUDA_NO_HALF_CONVERSIONS__',
    '-D__CUDA_NO_BFLOAT16_CONVERSIONS__',
    '-D__CUDA_NO_HALF2_OPERATORS__',
    '--expt-relaxed-constexpr'