mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[CI] Disable some tests that are failing in periodic (#150059)
Disabling some tests to restore periodic nogpu avx512 timeout:59f14d19ae (38492953496-box)profiler failure:7ae0ce6360 (38461255009-box)test_accelerator failure:87bfd66c3c (39476723746-box)origin: 146098 test_overrides failure:bf752c36da (39484562957-box)origin: 146098 inductor cpu repro:bb9c426024 (38447525659-box)functorch eager transforms:8f858e226b (39488068620-box)f2cea01f71 (39555064878)b5281a4a18 (39599355600)either 148288 or 148261?2ec9aceaeb/1Pull Request resolved: https://github.com/pytorch/pytorch/pull/150059 Approved by: https://github.com/ZainRizvi, https://github.com/atalman, https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
2bd5bfa3ce
commit
9092dd2e82
5
.github/workflows/periodic.yml
vendored
5
.github/workflows/periodic.yml
vendored
@ -59,8 +59,9 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
|
||||
@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
skipIfTorchDynamo,
|
||||
subtest,
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
TEST_WITH_TORCHDYNAMO,
|
||||
TestCase,
|
||||
xfailIfTorchDynamo,
|
||||
@ -2865,6 +2866,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_jvp, expected_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_return(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
@ -2879,6 +2884,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_jvp, expected_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_composition_vmap(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
|
||||
@ -2897,6 +2906,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_composition_grad(self, device, dtype):
|
||||
x_p = make_tensor((3,), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3,), device=device, dtype=dtype)
|
||||
@ -2916,6 +2929,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_nested_input_nested_output(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
|
||||
# torch.compile is not supported on Windows CUDA.
|
||||
# Triton only supports GPU with SM70 or later.
|
||||
@expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_compile_vmap_hessian(self, device):
|
||||
# The model and inputs are a smaller version
|
||||
# of code at benchmark repo:
|
||||
|
||||
@ -4149,6 +4149,10 @@ class CPUReproTests(TestCase):
|
||||
"__at_align__ std::array", 0, exactly=True
|
||||
).run(code)
|
||||
|
||||
@unittest.skipIf(
|
||||
os.getenv("ATEN_CPU_CAPABILITY") == "default",
|
||||
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
|
||||
)
|
||||
def test_group_norm_large_input(self):
|
||||
class M(torch.nn.Module):
|
||||
def __init__(self) -> None:
|
||||
@ -4178,6 +4182,10 @@ class CPUReproTests(TestCase):
|
||||
# check for parallel reduction.
|
||||
self.assertEqual(metrics.parallel_reduction_count, 1)
|
||||
|
||||
@unittest.skipIf(
|
||||
os.getenv("ATEN_CPU_CAPABILITY") == "default",
|
||||
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
|
||||
)
|
||||
def test_group_norm_large_size(self):
|
||||
# https://github.com/pytorch/pytorch/issues/141541
|
||||
# We are using the chunk size of 4096 for cascade summation,
|
||||
|
||||
@ -2169,6 +2169,10 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
|
||||
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
|
||||
@unittest.skipIf(not kineto_available(), "Kineto is required")
|
||||
@unittest.skipIf(
|
||||
"RelWithAssert" in torch.__config__.show(),
|
||||
"failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_profile_all_threads(self):
|
||||
profiling_started = threading.Event()
|
||||
profiling_ended = threading.Event()
|
||||
|
||||
@ -10,6 +10,8 @@ from torch.testing._internal.common_utils import NoTest, run_tests, TEST_MPS, Te
|
||||
if not torch.accelerator.is_available():
|
||||
print("No available accelerator detected, skipping tests", file=sys.stderr)
|
||||
TestCase = NoTest # noqa: F811
|
||||
# Skip because failing when run on cuda build with no GPU, see #150059 for example
|
||||
sys.exit()
|
||||
|
||||
TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# Owner(s): ["module: __torch_function__"]
|
||||
|
||||
import sys
|
||||
import torch
|
||||
import numpy as np
|
||||
import inspect
|
||||
@ -9,6 +10,7 @@ import pickle
|
||||
import collections
|
||||
import unittest
|
||||
import contextlib
|
||||
import os
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
|
||||
from torch.overrides import (
|
||||
@ -29,6 +31,14 @@ from torch.utils._pytree import tree_map
|
||||
|
||||
Tensor = torch.Tensor
|
||||
|
||||
if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
|
||||
# This test is not supported on ARM
|
||||
print(
|
||||
"Skipping due to failing when cuda build runs on non cuda machine, "
|
||||
+ "see https://github.com/pytorch/pytorch/pull/150059 for example"
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
# The functions below simulate the pure-python torch functions in the
|
||||
# torch.functional namespace. We use examples local to this file rather
|
||||
# than any of the real examples implemented in Python since in the
|
||||
|
||||
Reference in New Issue
Block a user