Update Cutlass to v3.1 (#94188)

Now that we are on CUDA 11+ exclusively, we can update Nvidia's Cutlass to the next version.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94188
Approved by: https://github.com/ezyang, https://github.com/jansel, https://github.com/malfet
This commit is contained in:
Aaron Gokaslan
2023-04-25 22:02:42 +00:00
committed by PyTorch MergeBot
parent 15e1bee269
commit dfba65be8b
4 changed files with 5 additions and 5 deletions

View File

@ -49,14 +49,14 @@ __device__ __forceinline__ void fastSpecializedAtomicAdd(
if (low_byte && index < (numel - 1)) {
__half2 value2;
value2.x = value;
value2.x = static_cast<__half>(value);
value2.y = __int2half_rz(0);
atomicAdd(reinterpret_cast<__half2*>(target_addr), value2);
} else if (!low_byte && index > 0) {
__half2 value2;
value2.x = __int2half_rz(0);
value2.y = value;
value2.y = static_cast<__half>(value);
atomicAdd(reinterpret_cast<__half2*>(target_addr - 1), value2);
} else {

View File

@ -21,7 +21,7 @@ __device__ void test(){
__half a = __float2half(3.0f);
__half b = __float2half(2.0f);
__half c = a - Half(b);
__half c = Half(a) - Half(b);
assert(static_cast<Half>(c) == Half(1.0));
// asserting if the functions used on

View File

@ -5,7 +5,7 @@ load("@rules_cc//cc:defs.bzl", "cc_library")
cc_library(
name = "cutlass",
hdrs = glob(["include/**/*.h"]),
hdrs = glob(["include/**/*.h", "include/**/*.hpp"]),
includes = ["include/"],
visibility = ["//visibility:public"],
)