mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Fix skipIfXpu and skipIfHpu disables tests when used on class (#151315)"
This reverts commit ee096b89f63394b2c18826288783eef241f3959c. Reverted https://github.com/pytorch/pytorch/pull/151315 on behalf of https://github.com/jeanschmidt due to Seems to have introduced internal regressions, see [D74668899](https://www.internalfb.com/diff/D74668899). @malfet may you help the author get this PR merged? ([comment](https://github.com/pytorch/pytorch/pull/151315#issuecomment-2880203323))
This commit is contained in:
@ -3,7 +3,6 @@
|
||||
import sys
|
||||
import unittest
|
||||
from functools import partial, wraps
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
@ -11,7 +10,6 @@ import torch.distributed._functional_collectives as ft_c
|
||||
import torch.distributed.distributed_c10d as c10d
|
||||
import torch.distributed.tensor as dt
|
||||
from functorch import make_fx
|
||||
from torch._dynamo.metrics_context import MetricsContext
|
||||
from torch._inductor.utils import run_and_get_code
|
||||
from torch.testing import FileCheck
|
||||
from torch.testing._internal.common_device_type import instantiate_device_type_tests
|
||||
@ -33,6 +31,7 @@ from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
run_tests,
|
||||
skipIfHpu,
|
||||
TEST_CUDA,
|
||||
TEST_HPU,
|
||||
TestCase,
|
||||
@ -91,7 +90,7 @@ def new_subgroups(group_size: int, pg_tag=None):
|
||||
return cur_subgroup, subgroups
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_HPU, "Unsupported on HPU")
|
||||
@skipIfHpu
|
||||
class TestExpand(MultiThreadedTestCase):
|
||||
@property
|
||||
def world_size(self):
|
||||
@ -181,7 +180,7 @@ class TestExpand(MultiThreadedTestCase):
|
||||
self.assertEqual(2, group_size)
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_HPU, "Unsupported on HPU")
|
||||
@skipIfHpu
|
||||
class TestPgTag(MultiThreadedTestCase):
|
||||
@property
|
||||
def world_size(self):
|
||||
@ -258,7 +257,7 @@ class TestPgTag(MultiThreadedTestCase):
|
||||
|
||||
|
||||
@instantiate_parametrized_tests
|
||||
@unittest.skipIf(TEST_HPU, "Unsupported on HPU")
|
||||
@skipIfHpu
|
||||
class TestTraceableCollectives(MultiThreadedTestCase):
|
||||
@property
|
||||
def world_size(self):
|
||||
@ -404,7 +403,7 @@ class TestMetaCollectives(TestCase):
|
||||
self.assertEqual(x.size(), out.size())
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_HPU, "Unsupported on HPU")
|
||||
@skipIfHpu
|
||||
class TestGradCollectives(MultiThreadedTestCase):
|
||||
@property
|
||||
def world_size(self):
|
||||
@ -657,7 +656,7 @@ class TestDistributedBackendCollectivesWithWorldSize4(
|
||||
|
||||
|
||||
@instantiate_parametrized_tests
|
||||
@unittest.skipIf(TEST_HPU, "Unsupported on HPU")
|
||||
@skipIfHpu
|
||||
class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
@ -667,13 +666,6 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
def world_size(self):
|
||||
return 2
|
||||
|
||||
# `compilation_metric` attempts to update the `is_forward` field of `metrics_context`. Since
|
||||
# `metrics_context` is a singleton, a runtime error will occur if multiple threads try to update it
|
||||
# because `MetricsContext` does not allow updating existing fields when `overwrite` is False.
|
||||
# So, we need to patch the `update` function of MetricsContext
|
||||
def _metrics_context_update(self, *args, **kwargs) -> None:
|
||||
pass
|
||||
|
||||
@parametrize("compile", [True, False])
|
||||
def test_all_to_all_single(self, compile: bool = True) -> None:
|
||||
group = dist.group.WORLD.group_name
|
||||
@ -699,8 +691,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
self.assertIsNotNone(out.grad_fn)
|
||||
self.assertTrue(out.requires_grad)
|
||||
loss = out.sum()
|
||||
with patch.object(MetricsContext, "update", self._metrics_context_update):
|
||||
loss.backward()
|
||||
loss.backward()
|
||||
self.assertEqual(t.grad, torch.full_like(t, 2.0))
|
||||
|
||||
def test_all_to_all_single_inductor(self) -> None:
|
||||
@ -720,8 +711,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
|
||||
def run_with_backward():
|
||||
out = compiled(t, self.world_size)
|
||||
with patch.object(MetricsContext, "update", self._metrics_context_update):
|
||||
out.backward()
|
||||
out.backward()
|
||||
|
||||
_, codes = run_and_get_code(run_with_backward)
|
||||
for code in codes:
|
||||
@ -761,8 +751,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
gathered_tensor = compiled(local_tensor, dim)
|
||||
self.assertEqual(gathered_tensor, torch.ones(output_size))
|
||||
|
||||
with patch.object(MetricsContext, "update", self._metrics_context_update):
|
||||
gathered_tensor.sum().backward()
|
||||
gathered_tensor.sum().backward()
|
||||
self.assertEqual(
|
||||
local_tensor.grad,
|
||||
torch.full((3, 3, 3), fill_value=float(self.world_size)),
|
||||
@ -797,8 +786,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
|
||||
rs_tensor = compiled(input_tensor, dim)
|
||||
res_num = 1 * group_size
|
||||
self.assertEqual(rs_tensor, torch.ones(input_size) * res_num)
|
||||
with patch.object(MetricsContext, "update", self._metrics_context_update):
|
||||
rs_tensor.sum().backward()
|
||||
rs_tensor.sum().backward()
|
||||
self.assertEqual(input_tensor.grad, torch.full(output_size, fill_value=1.0))
|
||||
|
||||
|
||||
|
@ -4,22 +4,17 @@ import unittest
|
||||
|
||||
import torch
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._dynamo.device_interface import get_interface_for_device
|
||||
from torch._inductor.autoheuristic.autoheuristic import AutoHeuristic, LocalFeedback
|
||||
from torch._inductor.autoheuristic.autoheuristic_utils import AHContext
|
||||
from torch._inductor.runtime.runtime_utils import cache_dir
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import get_gpu_shared_memory
|
||||
from torch.testing._internal.common_utils import TEST_XPU
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
GPU_TYPE,
|
||||
HAS_CUDA,
|
||||
HAS_GPU,
|
||||
IS_A100,
|
||||
IS_H100,
|
||||
)
|
||||
from torch.testing._internal.common_utils import skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, IS_A100, IS_H100
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_XPU, "AutoHeuristic doesn't currently work on the XPU stack")
|
||||
@skipIfXpu(msg="AutoHeuristic doesn't currently work on the XPU stack")
|
||||
class AutoHeuristicTest(TestCase):
|
||||
def count_lines_in_file(self, file_path):
|
||||
with open(file_path) as file:
|
||||
@ -107,9 +102,7 @@ class AutoHeuristicTest(TestCase):
|
||||
self.assertEqual(num_lines, 5)
|
||||
|
||||
shared_memory = get_gpu_shared_memory()
|
||||
|
||||
self.assertTrue(HAS_CUDA)
|
||||
(fst, snd) = torch.cuda.get_device_capability()
|
||||
(fst, snd) = get_interface_for_device(GPU_TYPE).get_device_capability()
|
||||
|
||||
with open(path) as file:
|
||||
lines = file.readlines()
|
||||
@ -158,7 +151,6 @@ class AutoHeuristicTest(TestCase):
|
||||
fx_graph_cache=False,
|
||||
fx_graph_remote_cache=False,
|
||||
)
|
||||
@unittest.skipIf(not IS_A100, "heuristic only run on A100")
|
||||
def test_global_feedback(self):
|
||||
self.run_mixed_mm()
|
||||
path = self.get_path_to_autoheuristic_log("mixed_mm")
|
||||
|
@ -6,11 +6,11 @@ import torch
|
||||
from torch._inductor.runtime.benchmarking import benchmarker
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import run_and_get_code
|
||||
from torch.testing._internal.common_utils import TEST_XPU
|
||||
from torch.testing._internal.common_utils import skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_XPU, "Segmentation fault on CI machine")
|
||||
@skipIfXpu(msg="Segmentation fault on CI machine")
|
||||
class B2BGEMMTest(TestCase):
|
||||
device = GPU_TYPE
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -10,7 +9,7 @@ from torch._dynamo.utils import same
|
||||
from torch._inductor import config
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_cuda import tf32_off
|
||||
from torch.testing._internal.common_utils import TEST_XPU
|
||||
from torch.testing._internal.common_utils import skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
@ -35,7 +34,7 @@ class Model2Conv(nn.Module):
|
||||
return (torch.rand(2, 3, 16, 16),)
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_XPU, "ccl doesn't currently work on the XPU stack")
|
||||
@skipIfXpu(msg="ccl doesn't currently work on the XPU stack")
|
||||
class TestLayoutOptim(TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
Reference in New Issue
Block a user