Revert "Inductor logging + analysis of torch.profile (#149697)"

This reverts commit 060838c2312ad207c7afe2c86f8a484afea5f328. Reverted https://github.com/pytorch/pytorch/pull/149697 on behalf of https://github.com/clee2000 due to broke a bunch of tests internally D76299454, probably also broke rocm inductor/test_analysis.py::TestAnalysisCUDA::test_augment_trace_against_flop_counter_maxat0_cuda_float16 [GH job link](https://github.com/pytorch/pytorch/actions/runs/15545277599/job/43766911025) [HUD commit link](060838c231) ([comment](https://github.com/pytorch/pytorch/pull/149697#issuecomment-2959747153))
2025-10-20 21:14:14 +08:00 · 2025-06-10 15:38:40 +00:00
parent b44306d368
commit eb152ab1dd
19 changed files with 74 additions and 1881 deletions
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@ -22,14 +22,7 @@ import tempfile
 import textwrap
 import time
 import unittest
-from collections.abc import (
-    Collection,
-    Generator,
-    Iterator,
-    Mapping,
-    MutableMapping,
-    MutableSet,
-)
+from collections.abc import Collection, Iterator, Mapping, MutableMapping, MutableSet
 from datetime import datetime
 from io import StringIO
 from typing import (
@ -58,7 +51,6 @@ from unittest import mock
 import sympy

 import torch
-from torch._inductor.analysis.device_info import datasheet_tops
 from torch._inductor.runtime.hints import DeviceProperties
 from torch.utils._ordered_set import OrderedSet
 from torch.utils._pytree import tree_map_only
@ -2123,27 +2115,17 @@ def get_backend_num_stages() -> int:


@functools.lru_cache(None)
-def get_device_tflops(dtype: torch.dtype) -> float:
-    """
-    We don't want to throw errors in this function. First check to see if the device is in device_info.py,
-    then fall back to the inaccurate triton estimation.
-    """
-    ds_tops = datasheet_tops(dtype, is_tf32=torch.backends.cuda.matmul.allow_tf32)
-    if ds_tops is not None:
-        return ds_tops
-
+def get_device_tflops(dtype: torch.dtype) -> int:
    from triton.testing import get_max_simd_tflops, get_max_tensorcore_tflops

-    from torch.testing._internal.common_cuda import SM80OrLater
-
    assert dtype in (torch.float16, torch.bfloat16, torch.float32)

    if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
        # Triton API change in https://github.com/triton-lang/triton/pull/2293
-        from torch._utils_internal import max_clock_rate_mhz
+        from torch._utils_internal import max_clock_rate

-        sm_clock = max_clock_rate_mhz()
-        if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
+        sm_clock = max_clock_rate()
+        if dtype in (torch.float16, torch.bfloat16):
            return get_max_tensorcore_tflops(dtype, sm_clock)

        if torch.backends.cuda.matmul.allow_tf32:
@ -2151,7 +2133,7 @@ def get_device_tflops(dtype: torch.dtype) -> float:
        else:
            return get_max_simd_tflops(torch.float32, sm_clock)
    else:
-        if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
+        if dtype in (torch.float16, torch.bfloat16):
            return get_max_tensorcore_tflops(dtype)

        if torch.backends.cuda.matmul.allow_tf32:
@ -3138,54 +3120,3 @@ def is_codegen_graph_partition_subgraph(wrapper: PythonWrapperCodegen) -> bool:
        isinstance(wrapper, SubgraphPythonWrapperCodegen)
        and wrapper.partition_signatures is not None
    )
-
-
-def tabulate_2d(elements: Sequence[Sequence[T]], headers: Sequence[T]) -> str:
-    widths = [len(str(e)) for e in headers]
-    for row in elements:
-        assert len(row) == len(headers)
-        for i, e in enumerate(row):
-            widths[i] = max(widths[i], len(str(e)))
-    lines = []
-    lines.append("|".join(f" {h:{w}} " for h, w in zip(headers, widths)))
-    #              widths          whitespace      horizontal separators
-    total_width = sum(widths) + (len(widths) * 2) + (len(widths) - 1)
-    lines.append("-" * total_width)
-    for row in elements:
-        lines.append("|".join(f" {e:{w}} " for e, w in zip(row, widths)))
-    return "\n".join(lines)
-
-
-def zip_dicts(
-    dict1: Mapping[KeyType, ValType],
-    dict2: Mapping[KeyType, ValType],
-    d1_default: ValType | None = None,
-    d2_default: ValType | None = None,
-) -> Generator[tuple[KeyType, ValType | None, ValType | None], None, None]:
-    """
-    Zip two dictionaries together, replacing missing keys with default values.
-
-    Args:
-        dict1 (dict): The first dictionary.
-        dict2 (dict): The second dictionary.
-        d1_default (Any): the default value for the first dictionary
-        d2_default (Any): the default value for the second dictionary
-
-    Yields:
-        tuple: A tuple containing the key, the value from dict1 (or d1_default if missing),
-               and the value from dict2 (or d2_default if missing).
-    """
-    # Find the union of all keys
-    all_keys = OrderedSet(dict1.keys()) | OrderedSet(dict2.keys())
-
-    # Iterate over all keys
-    for key in all_keys:
-        # Get the values from both dictionaries, or default if missing
-        value1 = dict1.get(key)
-        value2 = dict2.get(key)
-
-        yield (
-            key,
-            value1 if value1 is not None else d1_default,
-            value2 if value2 is not None else d2_default,
-        )