mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Revert "[profiler] don't disable CUPTI_LAZY_REINIT for cuda >= 12.6 (#150957)"
This reverts commit 37812009fd123d5c4a038ce798eedd4a89eeffad. Reverted https://github.com/pytorch/pytorch/pull/150957 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](https://github.com/pytorch/pytorch/pull/150957#issuecomment-2795878848))
This commit is contained in:
@ -13,7 +13,6 @@ from torch._inductor import config
|
||||
from torch.profiler import ProfilerActivity
|
||||
from torch.testing._internal.common_utils import TemporaryFileName
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.torch_version import TorchVersion
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
|
||||
@ -281,23 +280,6 @@ class DynamoProfilerTests(torch._inductor.test_case.TestCase):
|
||||
for e in triton_events:
|
||||
check_triton_event(e)
|
||||
|
||||
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
||||
def test_cupti_lazy_reinit(self):
|
||||
x, y = (torch.randn(4, 4, device="cuda") for _ in range(2))
|
||||
|
||||
def fn(x, y):
|
||||
return (x + y).sin()
|
||||
|
||||
fn_c = torch.compile(fn, mode="reduce-overhead")
|
||||
|
||||
with torch.profiler.profile():
|
||||
fn_c(x, y)
|
||||
|
||||
if TorchVersion(torch.version.cuda) >= "12.6":
|
||||
self.assertEqual("0", os.environ.get("DISABLE_CUPTI_LAZY_REINIT", "0"))
|
||||
else:
|
||||
self.assertEqual("1", os.environ.get("DISABLE_CUPTI_LAZY_REINIT", "0"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._inductor.test_case import run_tests
|
||||
|
@ -54,7 +54,6 @@ from torch._utils import (
|
||||
from torch._utils_internal import (
|
||||
get_file_path,
|
||||
prepare_multiprocessing_environment,
|
||||
profiler_allow_cudagraph_cupti_lazy_reinit_cuda12,
|
||||
USE_GLOBAL_DEPS,
|
||||
USE_RTLD_GLOBAL_WITH_LIBTORCH,
|
||||
)
|
||||
@ -2295,7 +2294,6 @@ class _TorchCompileInductorWrapper:
|
||||
|
||||
def __init__(self, mode, options, dynamic):
|
||||
from torch._inductor.compiler_bisector import CompilerBisector
|
||||
from torch.torch_version import TorchVersion
|
||||
|
||||
self.config: dict[str, _Any] = {}
|
||||
self.dynamic = dynamic
|
||||
@ -2303,13 +2301,7 @@ class _TorchCompileInductorWrapper:
|
||||
self.apply_options(options)
|
||||
self.apply_options(CompilerBisector.get_config_change("inductor"))
|
||||
|
||||
if self.config.get("triton.cudagraphs", False) and (
|
||||
(
|
||||
getattr(torch.version, "cuda", None)
|
||||
and TorchVersion(torch.version.cuda) < "12.6"
|
||||
)
|
||||
or not profiler_allow_cudagraph_cupti_lazy_reinit_cuda12()
|
||||
):
|
||||
if self.config.get("triton.cudagraphs", False):
|
||||
os.environ["DISABLE_CUPTI_LAZY_REINIT"] = "1"
|
||||
# FIXME: CUDA Graph does not work well with CUPTI teardown.
|
||||
# 1) crashes on 1st lazy CUPTI re-init after teardown (CUDA 11)
|
||||
|
@ -274,7 +274,3 @@ def record_chromium_event_internal(
|
||||
event: dict[str, Any],
|
||||
):
|
||||
return None
|
||||
|
||||
|
||||
def profiler_allow_cudagraph_cupti_lazy_reinit_cuda12():
|
||||
return True
|
||||
|
@ -23,10 +23,8 @@ from torch._C._profiler import (
|
||||
_remove_execution_trace_observer,
|
||||
)
|
||||
from torch._environment import is_fbcode
|
||||
from torch._utils_internal import profiler_allow_cudagraph_cupti_lazy_reinit_cuda12
|
||||
from torch.autograd import kineto_available, ProfilerActivity
|
||||
from torch.profiler._memory_profiler import MemoryProfile, MemoryProfileTimeline
|
||||
from torch.torch_version import TorchVersion
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -225,13 +223,7 @@ class _KinetoProfile:
|
||||
if hasattr(torch, "_inductor"):
|
||||
import torch._inductor.config as inductor_config
|
||||
|
||||
if inductor_config.triton.cudagraphs and (
|
||||
(
|
||||
getattr(torch.version, "cuda", None)
|
||||
and TorchVersion(torch.version.cuda) < "12.6"
|
||||
)
|
||||
or not profiler_allow_cudagraph_cupti_lazy_reinit_cuda12()
|
||||
):
|
||||
if inductor_config.triton.cudagraphs:
|
||||
os.environ["DISABLE_CUPTI_LAZY_REINIT"] = "1"
|
||||
self.add_metadata_json("DISABLE_CUPTI_LAZY_REINIT", "1")
|
||||
# FIXME: CUDA Graph does not work well with CUPTI teardown.
|
||||
|
Reference in New Issue
Block a user