Revert "[cutlass backend][BE][ez] Make matmul layouts be row x column (#156656)"

This reverts commit 84c588e5eada9e7921608065edc444a15c22cb1c.

Reverted https://github.com/pytorch/pytorch/pull/156656 on behalf of https://github.com/henrylhtsang due to breaking fbcode A100 tests ([comment](https://github.com/pytorch/pytorch/pull/156656#issuecomment-3020769914))
This commit is contained in:
PyTorch MergeBot
2025-06-30 21:16:04 +00:00
parent 3684be056d
commit d3efd73234

View File

@ -261,7 +261,7 @@ class TestCutlassBackend(TestCase):
M, N, K = 4096, 2048, 25728
a = torch.randn(M, K).cuda().half()
b = torch.randn(N, K).cuda().half().t()
b = torch.randn(K, N).cuda().half()
with config.patch(
{
@ -289,7 +289,7 @@ class TestCutlassBackend(TestCase):
M, N, K = 4096, 2048, 25728
a = torch.randn(M, K).cuda().half()
b = torch.randn(N, K).cuda().half().t()
b = torch.randn(K, N).cuda().half()
x_shapes = [
(M, N),
@ -326,7 +326,7 @@ class TestCutlassBackend(TestCase):
B, M, N, K = 10, 4096, 2048, 25728
a = torch.randn(B, M, K).cuda().half()
b = torch.randn(B, N, K).cuda().half().permute(0, 2, 1)
b = torch.randn(B, K, N).cuda().half()
with config.patch(
{
@ -358,8 +358,8 @@ class TestCutlassBackend(TestCase):
model = MyModel()
a = torch.randn(128, 16).cuda().half()
b = torch.randn(128, 16).cuda().half().t()
c = torch.randn(512, 16).cuda().half().t()
b = torch.randn(16, 128).cuda().half()
c = torch.randn(16, 512).cuda().half()
with config.patch(
{
@ -400,8 +400,8 @@ class TestCutlassBackend(TestCase):
model = MyModel()
a = torch.randn(128, 16).cuda().half()
b = torch.randn(128, 16).cuda().half().t()
c = torch.randn(512, 16).cuda().half().t()
b = torch.randn(16, 128).cuda().half()
c = torch.randn(16, 512).cuda().half()
with config.patch(
{
@ -465,7 +465,7 @@ class TestCutlassBackend(TestCase):
model = MyModel().cuda()
inputs = [
(torch.randn(M, K).cuda().to(dtype), torch.randn(N, K).cuda().to(dtype).t())
(torch.randn(M, K).cuda().to(dtype), torch.randn(K, N).cuda().to(dtype))
for (M, N, K) in shapes
]
@ -633,7 +633,7 @@ class TestCutlassBackend(TestCase):
(
torch.randn(x_shape(M, N)).cuda().to(dtype),
torch.randn(M, K).cuda().to(dtype),
torch.randn(N, K).cuda().to(dtype).t(),
torch.randn(K, N).cuda().to(dtype),
)
for (M, N, K) in shapes
]
@ -744,7 +744,7 @@ class TestCutlassBackend(TestCase):
return a @ b
a = torch.randn(128, 16).cuda().half()
b = torch.randn(128, 16).cuda().half().t()
b = torch.randn(16, 128).cuda().half()
with config.patch(
{
@ -770,7 +770,7 @@ class TestCutlassBackend(TestCase):
),
):
a = torch.randn(M, K).cuda().half()
b = torch.randn(N, K).cuda().half().t()
b = torch.randn(K, N).cuda().half()
Y_compiled = torch.compile(mm, dynamic=dynamic)(a, b)
Y = mm(a, b)
# we need relaxed numerical limits due to the sheer size of the
@ -935,7 +935,7 @@ class TestCutlassBackend(TestCase):
}
x = torch.randn(M, K).cuda().half()
w = torch.randn(N, K).cuda().half().t()
w = torch.randn(K, N).cuda().half()
actual = AOTIRunnerUtil.run(
model,
@ -973,7 +973,7 @@ class TestCutlassBackend(TestCase):
}
x = torch.randn(M, K).cuda().half()
w = torch.randn(N, K).cuda().half().t()
w = torch.randn(K, N).cuda().half()
actual = AOTIRunnerUtil.run(
model,
@ -1003,7 +1003,7 @@ class TestCutlassBackend(TestCase):
M, N, K = 200, 5216, 10_432
x = torch.randn(M, K).cuda().half()
w = torch.randn(N, K).cuda().half().t()
w = torch.randn(K, N).cuda().half()
actual = AOTIRunnerUtil.run(
model,
@ -1032,7 +1032,7 @@ class TestCutlassBackend(TestCase):
mask = torch.tensor([0, 0, 1, 1]).tile(m, k // 4).cuda().half()
a = torch.rand(m, k).cuda().half() * mask
a_sparse = to_sparse_semi_structured(a)
b = torch.rand(n, k).cuda().half().t()
b = torch.rand(k, n).cuda().half()
with config.patch(
{
@ -1335,7 +1335,7 @@ class TestCutlassBackend(TestCase):
M, N, K = (128, 128, 16)
A = torch.randn(M, K).cuda().half()
B = torch.randn(N, K).cuda().half().t()
B = torch.randn(K, N).cuda().half()
def select_no_algorithm(*args, **kwargs):
raise NoValidChoicesError