mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[cutlass backend][BE][ez] Make matmul layouts be row x column (#156656)"
This reverts commit 84c588e5eada9e7921608065edc444a15c22cb1c. Reverted https://github.com/pytorch/pytorch/pull/156656 on behalf of https://github.com/henrylhtsang due to breaking fbcode A100 tests ([comment](https://github.com/pytorch/pytorch/pull/156656#issuecomment-3020769914))
This commit is contained in:
@ -261,7 +261,7 @@ class TestCutlassBackend(TestCase):
|
||||
M, N, K = 4096, 2048, 25728
|
||||
|
||||
a = torch.randn(M, K).cuda().half()
|
||||
b = torch.randn(N, K).cuda().half().t()
|
||||
b = torch.randn(K, N).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -289,7 +289,7 @@ class TestCutlassBackend(TestCase):
|
||||
M, N, K = 4096, 2048, 25728
|
||||
|
||||
a = torch.randn(M, K).cuda().half()
|
||||
b = torch.randn(N, K).cuda().half().t()
|
||||
b = torch.randn(K, N).cuda().half()
|
||||
|
||||
x_shapes = [
|
||||
(M, N),
|
||||
@ -326,7 +326,7 @@ class TestCutlassBackend(TestCase):
|
||||
B, M, N, K = 10, 4096, 2048, 25728
|
||||
|
||||
a = torch.randn(B, M, K).cuda().half()
|
||||
b = torch.randn(B, N, K).cuda().half().permute(0, 2, 1)
|
||||
b = torch.randn(B, K, N).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -358,8 +358,8 @@ class TestCutlassBackend(TestCase):
|
||||
|
||||
model = MyModel()
|
||||
a = torch.randn(128, 16).cuda().half()
|
||||
b = torch.randn(128, 16).cuda().half().t()
|
||||
c = torch.randn(512, 16).cuda().half().t()
|
||||
b = torch.randn(16, 128).cuda().half()
|
||||
c = torch.randn(16, 512).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -400,8 +400,8 @@ class TestCutlassBackend(TestCase):
|
||||
|
||||
model = MyModel()
|
||||
a = torch.randn(128, 16).cuda().half()
|
||||
b = torch.randn(128, 16).cuda().half().t()
|
||||
c = torch.randn(512, 16).cuda().half().t()
|
||||
b = torch.randn(16, 128).cuda().half()
|
||||
c = torch.randn(16, 512).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -465,7 +465,7 @@ class TestCutlassBackend(TestCase):
|
||||
model = MyModel().cuda()
|
||||
|
||||
inputs = [
|
||||
(torch.randn(M, K).cuda().to(dtype), torch.randn(N, K).cuda().to(dtype).t())
|
||||
(torch.randn(M, K).cuda().to(dtype), torch.randn(K, N).cuda().to(dtype))
|
||||
for (M, N, K) in shapes
|
||||
]
|
||||
|
||||
@ -633,7 +633,7 @@ class TestCutlassBackend(TestCase):
|
||||
(
|
||||
torch.randn(x_shape(M, N)).cuda().to(dtype),
|
||||
torch.randn(M, K).cuda().to(dtype),
|
||||
torch.randn(N, K).cuda().to(dtype).t(),
|
||||
torch.randn(K, N).cuda().to(dtype),
|
||||
)
|
||||
for (M, N, K) in shapes
|
||||
]
|
||||
@ -744,7 +744,7 @@ class TestCutlassBackend(TestCase):
|
||||
return a @ b
|
||||
|
||||
a = torch.randn(128, 16).cuda().half()
|
||||
b = torch.randn(128, 16).cuda().half().t()
|
||||
b = torch.randn(16, 128).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -770,7 +770,7 @@ class TestCutlassBackend(TestCase):
|
||||
),
|
||||
):
|
||||
a = torch.randn(M, K).cuda().half()
|
||||
b = torch.randn(N, K).cuda().half().t()
|
||||
b = torch.randn(K, N).cuda().half()
|
||||
Y_compiled = torch.compile(mm, dynamic=dynamic)(a, b)
|
||||
Y = mm(a, b)
|
||||
# we need relaxed numerical limits due to the sheer size of the
|
||||
@ -935,7 +935,7 @@ class TestCutlassBackend(TestCase):
|
||||
}
|
||||
|
||||
x = torch.randn(M, K).cuda().half()
|
||||
w = torch.randn(N, K).cuda().half().t()
|
||||
w = torch.randn(K, N).cuda().half()
|
||||
|
||||
actual = AOTIRunnerUtil.run(
|
||||
model,
|
||||
@ -973,7 +973,7 @@ class TestCutlassBackend(TestCase):
|
||||
}
|
||||
|
||||
x = torch.randn(M, K).cuda().half()
|
||||
w = torch.randn(N, K).cuda().half().t()
|
||||
w = torch.randn(K, N).cuda().half()
|
||||
|
||||
actual = AOTIRunnerUtil.run(
|
||||
model,
|
||||
@ -1003,7 +1003,7 @@ class TestCutlassBackend(TestCase):
|
||||
M, N, K = 200, 5216, 10_432
|
||||
|
||||
x = torch.randn(M, K).cuda().half()
|
||||
w = torch.randn(N, K).cuda().half().t()
|
||||
w = torch.randn(K, N).cuda().half()
|
||||
|
||||
actual = AOTIRunnerUtil.run(
|
||||
model,
|
||||
@ -1032,7 +1032,7 @@ class TestCutlassBackend(TestCase):
|
||||
mask = torch.tensor([0, 0, 1, 1]).tile(m, k // 4).cuda().half()
|
||||
a = torch.rand(m, k).cuda().half() * mask
|
||||
a_sparse = to_sparse_semi_structured(a)
|
||||
b = torch.rand(n, k).cuda().half().t()
|
||||
b = torch.rand(k, n).cuda().half()
|
||||
|
||||
with config.patch(
|
||||
{
|
||||
@ -1335,7 +1335,7 @@ class TestCutlassBackend(TestCase):
|
||||
|
||||
M, N, K = (128, 128, 16)
|
||||
A = torch.randn(M, K).cuda().half()
|
||||
B = torch.randn(N, K).cuda().half().t()
|
||||
B = torch.randn(K, N).cuda().half()
|
||||
|
||||
def select_no_algorithm(*args, **kwargs):
|
||||
raise NoValidChoicesError
|
||||
|
Reference in New Issue
Block a user