mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Update Cutlass to v3.1 (#94188)
Now that we are on CUDA 11+ exclusively, we can update Nvidia's Cutlass to the next version. Pull Request resolved: https://github.com/pytorch/pytorch/pull/94188 Approved by: https://github.com/ezyang, https://github.com/jansel, https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
15e1bee269
commit
dfba65be8b
@ -49,14 +49,14 @@ __device__ __forceinline__ void fastSpecializedAtomicAdd(
|
||||
|
||||
if (low_byte && index < (numel - 1)) {
|
||||
__half2 value2;
|
||||
value2.x = value;
|
||||
value2.x = static_cast<__half>(value);
|
||||
value2.y = __int2half_rz(0);
|
||||
atomicAdd(reinterpret_cast<__half2*>(target_addr), value2);
|
||||
|
||||
} else if (!low_byte && index > 0) {
|
||||
__half2 value2;
|
||||
value2.x = __int2half_rz(0);
|
||||
value2.y = value;
|
||||
value2.y = static_cast<__half>(value);
|
||||
atomicAdd(reinterpret_cast<__half2*>(target_addr - 1), value2);
|
||||
|
||||
} else {
|
||||
|
@ -21,7 +21,7 @@ __device__ void test(){
|
||||
|
||||
__half a = __float2half(3.0f);
|
||||
__half b = __float2half(2.0f);
|
||||
__half c = a - Half(b);
|
||||
__half c = Half(a) - Half(b);
|
||||
assert(static_cast<Half>(c) == Half(1.0));
|
||||
|
||||
// asserting if the functions used on
|
||||
|
2
third_party/cutlass
vendored
2
third_party/cutlass
vendored
Submodule third_party/cutlass updated: b72cbf957d...43cfbe0086
2
third_party/cutlass.BUILD
vendored
2
third_party/cutlass.BUILD
vendored
@ -5,7 +5,7 @@ load("@rules_cc//cc:defs.bzl", "cc_library")
|
||||
|
||||
cc_library(
|
||||
name = "cutlass",
|
||||
hdrs = glob(["include/**/*.h"]),
|
||||
hdrs = glob(["include/**/*.h", "include/**/*.hpp"]),
|
||||
includes = ["include/"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
Reference in New Issue
Block a user