diff --git a/test/inductor/test_profiler.py b/test/inductor/test_profiler.py index d3a661a0bd4d..75e6a7ed4076 100644 --- a/test/inductor/test_profiler.py +++ b/test/inductor/test_profiler.py @@ -13,7 +13,6 @@ from torch._inductor import config from torch.profiler import ProfilerActivity from torch.testing._internal.common_utils import TemporaryFileName from torch.testing._internal.inductor_utils import HAS_CUDA -from torch.torch_version import TorchVersion from torch.utils._triton import has_triton @@ -281,23 +280,6 @@ class DynamoProfilerTests(torch._inductor.test_case.TestCase): for e in triton_events: check_triton_event(e) - @unittest.skipIf(not HAS_TRITON, "requires cuda & triton") - def test_cupti_lazy_reinit(self): - x, y = (torch.randn(4, 4, device="cuda") for _ in range(2)) - - def fn(x, y): - return (x + y).sin() - - fn_c = torch.compile(fn, mode="reduce-overhead") - - with torch.profiler.profile(): - fn_c(x, y) - - if TorchVersion(torch.version.cuda) >= "12.6": - self.assertEqual("0", os.environ.get("DISABLE_CUPTI_LAZY_REINIT", "0")) - else: - self.assertEqual("1", os.environ.get("DISABLE_CUPTI_LAZY_REINIT", "0")) - if __name__ == "__main__": from torch._inductor.test_case import run_tests diff --git a/torch/__init__.py b/torch/__init__.py index 89e1540eb55c..5fc07487dfa1 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -54,7 +54,6 @@ from torch._utils import ( from torch._utils_internal import ( get_file_path, prepare_multiprocessing_environment, - profiler_allow_cudagraph_cupti_lazy_reinit_cuda12, USE_GLOBAL_DEPS, USE_RTLD_GLOBAL_WITH_LIBTORCH, ) @@ -2295,7 +2294,6 @@ class _TorchCompileInductorWrapper: def __init__(self, mode, options, dynamic): from torch._inductor.compiler_bisector import CompilerBisector - from torch.torch_version import TorchVersion self.config: dict[str, _Any] = {} self.dynamic = dynamic @@ -2303,13 +2301,7 @@ class _TorchCompileInductorWrapper: self.apply_options(options) self.apply_options(CompilerBisector.get_config_change("inductor")) - if self.config.get("triton.cudagraphs", False) and ( - ( - getattr(torch.version, "cuda", None) - and TorchVersion(torch.version.cuda) < "12.6" - ) - or not profiler_allow_cudagraph_cupti_lazy_reinit_cuda12() - ): + if self.config.get("triton.cudagraphs", False): os.environ["DISABLE_CUPTI_LAZY_REINIT"] = "1" # FIXME: CUDA Graph does not work well with CUPTI teardown. # 1) crashes on 1st lazy CUPTI re-init after teardown (CUDA 11) diff --git a/torch/_utils_internal.py b/torch/_utils_internal.py index b5e2fcca3e71..c6788e44bbad 100644 --- a/torch/_utils_internal.py +++ b/torch/_utils_internal.py @@ -274,7 +274,3 @@ def record_chromium_event_internal( event: dict[str, Any], ): return None - - -def profiler_allow_cudagraph_cupti_lazy_reinit_cuda12(): - return True diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py index 22c9d78e6dc6..ecc848c20e7e 100644 --- a/torch/profiler/profiler.py +++ b/torch/profiler/profiler.py @@ -23,10 +23,8 @@ from torch._C._profiler import ( _remove_execution_trace_observer, ) from torch._environment import is_fbcode -from torch._utils_internal import profiler_allow_cudagraph_cupti_lazy_reinit_cuda12 from torch.autograd import kineto_available, ProfilerActivity from torch.profiler._memory_profiler import MemoryProfile, MemoryProfileTimeline -from torch.torch_version import TorchVersion __all__ = [ @@ -225,13 +223,7 @@ class _KinetoProfile: if hasattr(torch, "_inductor"): import torch._inductor.config as inductor_config - if inductor_config.triton.cudagraphs and ( - ( - getattr(torch.version, "cuda", None) - and TorchVersion(torch.version.cuda) < "12.6" - ) - or not profiler_allow_cudagraph_cupti_lazy_reinit_cuda12() - ): + if inductor_config.triton.cudagraphs: os.environ["DISABLE_CUPTI_LAZY_REINIT"] = "1" self.add_metadata_json("DISABLE_CUPTI_LAZY_REINIT", "1") # FIXME: CUDA Graph does not work well with CUPTI teardown.