mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Inductor logging + analysis of torch.profile (#149697)"
This reverts commit 060838c2312ad207c7afe2c86f8a484afea5f328.
Reverted https://github.com/pytorch/pytorch/pull/149697 on behalf of https://github.com/clee2000 due to broke a bunch of tests internally D76299454, probably also broke rocm inductor/test_analysis.py::TestAnalysisCUDA::test_augment_trace_against_flop_counter_maxat0_cuda_float16 [GH job link](https://github.com/pytorch/pytorch/actions/runs/15545277599/job/43766911025) [HUD commit link](060838c231
) ([comment](https://github.com/pytorch/pytorch/pull/149697#issuecomment-2959747153))
This commit is contained in:
@ -22,14 +22,7 @@ import tempfile
|
||||
import textwrap
|
||||
import time
|
||||
import unittest
|
||||
from collections.abc import (
|
||||
Collection,
|
||||
Generator,
|
||||
Iterator,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
MutableSet,
|
||||
)
|
||||
from collections.abc import Collection, Iterator, Mapping, MutableMapping, MutableSet
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
from typing import (
|
||||
@ -58,7 +51,6 @@ from unittest import mock
|
||||
import sympy
|
||||
|
||||
import torch
|
||||
from torch._inductor.analysis.device_info import datasheet_tops
|
||||
from torch._inductor.runtime.hints import DeviceProperties
|
||||
from torch.utils._ordered_set import OrderedSet
|
||||
from torch.utils._pytree import tree_map_only
|
||||
@ -2123,27 +2115,17 @@ def get_backend_num_stages() -> int:
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def get_device_tflops(dtype: torch.dtype) -> float:
|
||||
"""
|
||||
We don't want to throw errors in this function. First check to see if the device is in device_info.py,
|
||||
then fall back to the inaccurate triton estimation.
|
||||
"""
|
||||
ds_tops = datasheet_tops(dtype, is_tf32=torch.backends.cuda.matmul.allow_tf32)
|
||||
if ds_tops is not None:
|
||||
return ds_tops
|
||||
|
||||
def get_device_tflops(dtype: torch.dtype) -> int:
|
||||
from triton.testing import get_max_simd_tflops, get_max_tensorcore_tflops
|
||||
|
||||
from torch.testing._internal.common_cuda import SM80OrLater
|
||||
|
||||
assert dtype in (torch.float16, torch.bfloat16, torch.float32)
|
||||
|
||||
if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
|
||||
# Triton API change in https://github.com/triton-lang/triton/pull/2293
|
||||
from torch._utils_internal import max_clock_rate_mhz
|
||||
from torch._utils_internal import max_clock_rate
|
||||
|
||||
sm_clock = max_clock_rate_mhz()
|
||||
if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
|
||||
sm_clock = max_clock_rate()
|
||||
if dtype in (torch.float16, torch.bfloat16):
|
||||
return get_max_tensorcore_tflops(dtype, sm_clock)
|
||||
|
||||
if torch.backends.cuda.matmul.allow_tf32:
|
||||
@ -2151,7 +2133,7 @@ def get_device_tflops(dtype: torch.dtype) -> float:
|
||||
else:
|
||||
return get_max_simd_tflops(torch.float32, sm_clock)
|
||||
else:
|
||||
if dtype in (torch.float16, torch.bfloat16) and SM80OrLater:
|
||||
if dtype in (torch.float16, torch.bfloat16):
|
||||
return get_max_tensorcore_tflops(dtype)
|
||||
|
||||
if torch.backends.cuda.matmul.allow_tf32:
|
||||
@ -3138,54 +3120,3 @@ def is_codegen_graph_partition_subgraph(wrapper: PythonWrapperCodegen) -> bool:
|
||||
isinstance(wrapper, SubgraphPythonWrapperCodegen)
|
||||
and wrapper.partition_signatures is not None
|
||||
)
|
||||
|
||||
|
||||
def tabulate_2d(elements: Sequence[Sequence[T]], headers: Sequence[T]) -> str:
|
||||
widths = [len(str(e)) for e in headers]
|
||||
for row in elements:
|
||||
assert len(row) == len(headers)
|
||||
for i, e in enumerate(row):
|
||||
widths[i] = max(widths[i], len(str(e)))
|
||||
lines = []
|
||||
lines.append("|".join(f" {h:{w}} " for h, w in zip(headers, widths)))
|
||||
# widths whitespace horizontal separators
|
||||
total_width = sum(widths) + (len(widths) * 2) + (len(widths) - 1)
|
||||
lines.append("-" * total_width)
|
||||
for row in elements:
|
||||
lines.append("|".join(f" {e:{w}} " for e, w in zip(row, widths)))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def zip_dicts(
|
||||
dict1: Mapping[KeyType, ValType],
|
||||
dict2: Mapping[KeyType, ValType],
|
||||
d1_default: ValType | None = None,
|
||||
d2_default: ValType | None = None,
|
||||
) -> Generator[tuple[KeyType, ValType | None, ValType | None], None, None]:
|
||||
"""
|
||||
Zip two dictionaries together, replacing missing keys with default values.
|
||||
|
||||
Args:
|
||||
dict1 (dict): The first dictionary.
|
||||
dict2 (dict): The second dictionary.
|
||||
d1_default (Any): the default value for the first dictionary
|
||||
d2_default (Any): the default value for the second dictionary
|
||||
|
||||
Yields:
|
||||
tuple: A tuple containing the key, the value from dict1 (or d1_default if missing),
|
||||
and the value from dict2 (or d2_default if missing).
|
||||
"""
|
||||
# Find the union of all keys
|
||||
all_keys = OrderedSet(dict1.keys()) | OrderedSet(dict2.keys())
|
||||
|
||||
# Iterate over all keys
|
||||
for key in all_keys:
|
||||
# Get the values from both dictionaries, or default if missing
|
||||
value1 = dict1.get(key)
|
||||
value2 = dict2.get(key)
|
||||
|
||||
yield (
|
||||
key,
|
||||
value1 if value1 is not None else d1_default,
|
||||
value2 if value2 is not None else d2_default,
|
||||
)
|
||||
|
Reference in New Issue
Block a user