mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-20 21:14:14 +08:00 
			
		
		
		
	[tests] Reduce sizes of unnecessarily large tensors to reduce OOM flakes (#158456)
Downsizes several tensors that were massively oversized to test the problem at hand, to reduce test flaking. Fixes #126867 Pull Request resolved: https://github.com/pytorch/pytorch/pull/158456 Approved by: https://github.com/desertfire
This commit is contained in:
		
				
					committed by
					
						 PyTorch MergeBot
						PyTorch MergeBot
					
				
			
			
				
	
			
			
			
						parent
						
							6100ed457c
						
					
				
				
					commit
					cab96b5879
				
			| @ -815,9 +815,9 @@ class TestMaxAutotune(TestCase): | ||||
|         Check https://github.com/pytorch/pytorch/issues/125437 for more details. | ||||
|         """ | ||||
|         x = rand_strided( | ||||
|             (50257, 32768), (1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|             (50257, 2048), (1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|         ) | ||||
|         y = rand_strided((32768, 768), (768, 1), dtype=torch.bfloat16, device=GPU_TYPE) | ||||
|         y = rand_strided((2048, 768), (768, 1), dtype=torch.bfloat16, device=GPU_TYPE) | ||||
|  | ||||
|         @torch.compile(mode="max-autotune") | ||||
|         def f(x, y): | ||||
| @ -830,9 +830,9 @@ class TestMaxAutotune(TestCase): | ||||
|     def test_non_contiguous_input_addmm(self): | ||||
|         b = torch.randn((768), dtype=torch.bfloat16, device=GPU_TYPE) | ||||
|         x = rand_strided( | ||||
|             (50257, 32768), (1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|             (50257, 2048), (1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|         ) | ||||
|         y = rand_strided((32768, 768), (768, 1), dtype=torch.bfloat16, device=GPU_TYPE) | ||||
|         y = rand_strided((2048, 768), (768, 1), dtype=torch.bfloat16, device=GPU_TYPE) | ||||
|  | ||||
|         @torch.compile(mode="max-autotune") | ||||
|         def f(x, y): | ||||
| @ -844,10 +844,10 @@ class TestMaxAutotune(TestCase): | ||||
|  | ||||
|     def test_non_contiguous_input_bmm(self): | ||||
|         x = rand_strided( | ||||
|             (1, 50257, 32768), (0, 1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|             (1, 50257, 2048), (0, 1, 50304), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|         ) | ||||
|         y = rand_strided( | ||||
|             (1, 32768, 768), (0, 768, 1), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|             (1, 2048, 768), (0, 768, 1), dtype=torch.bfloat16, device=GPU_TYPE | ||||
|         ) | ||||
|  | ||||
|         @torch.compile(mode="max-autotune") | ||||
| @ -861,16 +861,12 @@ class TestMaxAutotune(TestCase): | ||||
|     # TODO: fix accuracy failure of the triton template on XPU. | ||||
|     # and enable this test case. | ||||
|     @skipIfXpu | ||||
|     @unittest.skipIf( | ||||
|         os.getenv("TORCHINDUCTOR_CPP_WRAPPER", "0") == "1", | ||||
|         "OOM when running with TORCHINDUCTOR_CPP_WRAPPER https://github.com/pytorch/pytorch/issues/126867", | ||||
|     ) | ||||
|     def test_non_contiguous_input_mm_plus_mm(self): | ||||
|         x1 = rand_strided((50257, 32768), (1, 50304), device=GPU_TYPE) | ||||
|         y1 = rand_strided((32768, 768), (768, 1), device=GPU_TYPE) | ||||
|         x1 = rand_strided((50257, 2048), (1, 50304), device=GPU_TYPE) | ||||
|         y1 = rand_strided((2048, 768), (768, 1), device=GPU_TYPE) | ||||
|  | ||||
|         x2 = rand_strided((50257, 32768), (1, 50304), device=GPU_TYPE) | ||||
|         y2 = rand_strided((32768, 768), (768, 1), device=GPU_TYPE) | ||||
|         x2 = rand_strided((50257, 2048), (1, 50304), device=GPU_TYPE) | ||||
|         y2 = rand_strided((2048, 768), (768, 1), device=GPU_TYPE) | ||||
|  | ||||
|         @torch.compile(mode="max-autotune") | ||||
|         def f(x1, y1, x2, y2): | ||||
|  | ||||
		Reference in New Issue
	
	Block a user