Revert "Inductor logging + analysis of torch.profile (#149697)"

This reverts commit 060838c2312ad207c7afe2c86f8a484afea5f328.

Reverted https://github.com/pytorch/pytorch/pull/149697 on behalf of https://github.com/clee2000 due to broke a bunch of tests internally D76299454, probably also broke rocm inductor/test_analysis.py::TestAnalysisCUDA::test_augment_trace_against_flop_counter_maxat0_cuda_float16 [GH job link](https://github.com/pytorch/pytorch/actions/runs/15545277599/job/43766911025) [HUD commit link](060838c231) ([comment](https://github.com/pytorch/pytorch/pull/149697#issuecomment-2959747153))
This commit is contained in:
PyTorch MergeBot
2025-06-10 15:38:40 +00:00
parent b44306d368
commit eb152ab1dd
19 changed files with 74 additions and 1881 deletions

View File

@ -22,14 +22,7 @@ import tempfile
import textwrap
import time
import unittest
from collections.abc import (
Collection,
Generator,
Iterator,
Mapping,
MutableMapping,
MutableSet,
)
from collections.abc import Collection, Iterator, Mapping, MutableMapping, MutableSet
from datetime import datetime
from io import StringIO
from typing import (
@ -58,7 +51,6 @@ from unittest import mock
import sympy
import torch
from torch._inductor.analysis.device_info import datasheet_tops
from torch._inductor.runtime.hints import DeviceProperties
from torch.utils._ordered_set import OrderedSet
from torch.utils._pytree import tree_map_only
@ -2123,27 +2115,17 @@ def get_backend_num_stages() -> int:
@functools.lru_cache(None)
def get_device_tflops(dtype: torch.dtype) -> float:
"""
We don't want to throw errors in this function. First check to see if the device is in device_info.py,
then fall back to the inaccurate triton estimation.
"""
ds_tops = datasheet_tops(dtype, is_tf32=torch.backends.cuda.matmul.allow_tf32)
if ds_tops is not None:
return ds_tops
def get_device_tflops(dtype: torch.dtype) -> int:
from triton.testing import get_max_simd_tflops, get_max_tensorcore_tflops
from torch.testing._internal.common_cuda import SM80OrLater
assert dtype in (torch.float16, torch.bfloat16, torch.float32)
if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
# Triton API change in https://github.com/triton-lang/triton/pull/2293
from torch._utils_internal import max_clock_rate_mhz
from torch._utils_internal import max_clock_rate
sm_clock = max_clock_rate_mhz()
if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
sm_clock = max_clock_rate()
if dtype in (torch.float16, torch.bfloat16):
return get_max_tensorcore_tflops(dtype, sm_clock)
if torch.backends.cuda.matmul.allow_tf32:
@ -2151,7 +2133,7 @@ def get_device_tflops(dtype: torch.dtype) -> float:
else:
return get_max_simd_tflops(torch.float32, sm_clock)
else:
if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
if dtype in (torch.float16, torch.bfloat16):
return get_max_tensorcore_tflops(dtype)
if torch.backends.cuda.matmul.allow_tf32:
@ -3138,54 +3120,3 @@ def is_codegen_graph_partition_subgraph(wrapper: PythonWrapperCodegen) -> bool:
isinstance(wrapper, SubgraphPythonWrapperCodegen)
and wrapper.partition_signatures is not None
)
def tabulate_2d(elements: Sequence[Sequence[T]], headers: Sequence[T]) -> str:
widths = [len(str(e)) for e in headers]
for row in elements:
assert len(row) == len(headers)
for i, e in enumerate(row):
widths[i] = max(widths[i], len(str(e)))
lines = []
lines.append("|".join(f" {h:{w}} " for h, w in zip(headers, widths)))
# widths whitespace horizontal separators
total_width = sum(widths) + (len(widths) * 2) + (len(widths) - 1)
lines.append("-" * total_width)
for row in elements:
lines.append("|".join(f" {e:{w}} " for e, w in zip(row, widths)))
return "\n".join(lines)
def zip_dicts(
dict1: Mapping[KeyType, ValType],
dict2: Mapping[KeyType, ValType],
d1_default: ValType | None = None,
d2_default: ValType | None = None,
) -> Generator[tuple[KeyType, ValType | None, ValType | None], None, None]:
"""
Zip two dictionaries together, replacing missing keys with default values.
Args:
dict1 (dict): The first dictionary.
dict2 (dict): The second dictionary.
d1_default (Any): the default value for the first dictionary
d2_default (Any): the default value for the second dictionary
Yields:
tuple: A tuple containing the key, the value from dict1 (or d1_default if missing),
and the value from dict2 (or d2_default if missing).
"""
# Find the union of all keys
all_keys = OrderedSet(dict1.keys()) | OrderedSet(dict2.keys())
# Iterate over all keys
for key in all_keys:
# Get the values from both dictionaries, or default if missing
value1 = dict1.get(key)
value2 = dict2.get(key)
yield (
key,
value1 if value1 is not None else d1_default,
value2 if value2 is not None else d2_default,
)