mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-05 08:24:57 +08:00
[nvfuser] avoid out of bounds error (#89584)
Summary: update OOB check (https://github.com/csarofeen/pytorch/pull/2218) and skip tests that OOM on internal machines. Test Plan: ``` buck2 test mode/dev-nosan //caffe2/torch/csrc/jit/codegen/cuda/test:nvfuser ``` Differential Revision: D41502369 Pull Request resolved: https://github.com/pytorch/pytorch/pull/89584 Approved by: https://github.com/jjsjann123
This commit is contained in:
committed by
PyTorch MergeBot
parent
77df2ca9b6
commit
908daa8ae5
@ -7177,6 +7177,9 @@ TEST_F(NVFuserTest, FusionComputeAtExprOrder2_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionComputeAtExprOrder3_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
@ -9791,6 +9794,9 @@ TEST_F(NVFuserTest, FusionSmemDynamicReductionSymbolicArg_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionSmemDynamicPwiseMulSymbolicArgWAR_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
|
||||
@ -2704,6 +2704,9 @@ TEST_F(NVFuserTest, FusionWelfordOp_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionBlockWelfordOp_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
@ -6336,6 +6339,9 @@ TEST_F(NVFuserTest, FusionWelfordOuterPersistence_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionSegmentIslands_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
auto fusion = std::make_unique<Fusion>();
|
||||
FusionGuard fg(fusion.get());
|
||||
|
||||
|
||||
@ -5945,6 +5945,9 @@ TEST_F(NVFuserTest, AsyncCompilation_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionMergeBroadcastingTrivialReduction1_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
|
||||
auto fusion = fusion_ptr.get();
|
||||
FusionGuard fg(fusion);
|
||||
|
||||
@ -1561,6 +1561,9 @@ TEST_F(NVFuserTest, FusionGroupedReductionReEntrant1_CUDA) {
|
||||
// Channels-last batch norm with vectorization. Relies on re-entrant
|
||||
// GroupedGridReduction
|
||||
TEST_F(NVFuserTest, FusionGroupedReductionChannelsLastBatchNormLike_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
|
||||
@ -167,6 +167,9 @@ TEST_F(NVFuserTest, FusionRNGManualScheduleValidateWithCURand_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionRNGManualScheduleValidateWithCURand2_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "Fails accuracy on V100 32gb";
|
||||
#endif
|
||||
auto dtype = kFloat;
|
||||
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
|
||||
auto fusion = fusion_ptr.get();
|
||||
|
||||
@ -2621,6 +2621,9 @@ TEST_F(NVFuserTest, FusionGather4_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionGather5_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
|
||||
@ -2815,6 +2815,9 @@ TEST_F(NVFuserTest, FusionAmpereMatmulLargeLoad_CUDA) {
|
||||
|
||||
// Matmul test for Turing MMA: across supported layouts
|
||||
TEST_F(NVFuserTest, FusionTuringMatmulLargeLoad_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
// Keep multiples of 8 to keep vectorizable.
|
||||
int M = 504, N = 136, K = 248;
|
||||
|
||||
|
||||
@ -335,6 +335,9 @@ TEST_F(NVFuserTest, FusionScheduleTransposeMultipleOutput_CUDA) {
|
||||
* t1
|
||||
*/
|
||||
TEST_F(NVFuserTest, FusionScheduleTransposeMultipleInputOutput_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
Fusion fusion;
|
||||
FusionGuard fg(&fusion);
|
||||
|
||||
@ -994,6 +997,9 @@ TEST_F(NVFuserTest, FusionScheduleTransposeSmallInnerSize3_CUDA) {
|
||||
|
||||
// x->sin->transpose->cos->y
|
||||
TEST_F(NVFuserTest, FusionScheduleTranspose2DSmallInnerSize_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "OOM on V100 32gb";
|
||||
#endif
|
||||
std::array<std::vector<int64_t>, 2> shapes{
|
||||
std::vector<int64_t>{1024 * 1024 * 128, 2},
|
||||
std::vector<int64_t>{2, 1024 * 1024 * 128}};
|
||||
|
||||
@ -1272,6 +1272,9 @@ TEST_F(NVFuserTest, FusionViewVectorize_CUDA) {
|
||||
}
|
||||
|
||||
TEST_F(NVFuserTest, FusionExpandFlatten_CUDA) {
|
||||
#ifdef FBCODE_CAFFE2
|
||||
GTEST_SKIP() << "Fails accuracy on V100 32gb";
|
||||
#endif
|
||||
auto fusion = std::make_unique<Fusion>();
|
||||
FusionGuard fg(fusion.get());
|
||||
|
||||
|
||||
@ -509,7 +509,7 @@ class AnalyzeViewTransformation {
|
||||
"View is complete, but there's still some elements to distribute.");
|
||||
}
|
||||
|
||||
if ((new_view_index == new_view_.size() ||
|
||||
if ((new_view_index + 1 >= new_view_.size() ||
|
||||
(new_view_[new_view_index + 1] != 1)) &&
|
||||
original_view_index + 1 < original_view_.size() &&
|
||||
original_view_[original_view_index + 1] == 1 &&
|
||||
|
||||
Reference in New Issue
Block a user