mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[inductor] remove no_x_dim (#159810)
no_x_dim is used to indicate that a reduction operates on a single row, and data loaded for the reduction is 1-dimensional. no_x_dim was introduced in https://github.com/pytorch/pytorch/pull/102444 - in which there was bad perf in some reductions, and using 1D tensors fixed the perf issue. However, it appears that this perf issue no longer exists in current Triton versions. https://github.com/pytorch/pytorch/pull/118822 checked this, and we can also check this on H100 benchmarks (linked below). And another motivation for removing this behavior is that it enables larger loads, which we observe is necessary for good performance on certain shapes on Blackwell. H100 inference benchmarks: https://hud.pytorch.org/benchmark/compilers?dashboard=torchinductor&startTime=Mon%2C%2004%20Aug%202025%2004%3A13%3A24%20GMT&stopTime=Mon%2C%2011%20Aug%202025%2004%3A13%3A24%20GMT&granularity=hour&mode=inference&dtype=bfloat16&deviceName=cuda%20(h100)&lBranch=gh/davidberard98/396/orig&lCommit=a6bcd4692fb39fa2fad260f290bff545d4425829&rBranch=main&rCommit=e96c7c4bb0f6aeae2ab3b6f040f7d67edbec199a H100 training benchmarks: https://hud.pytorch.org/benchmark/compilers?dashboard=torchinductor&startTime=Mon%2C%2004%20Aug%202025%2004%3A13%3A24%20GMT&stopTime=Mon%2C%2011%20Aug%202025%2004%3A13%3A24%20GMT&granularity=hour&mode=training&dtype=amp&deviceName=cuda%20(h100)&lBranch=gh/davidberard98/396/orig&lCommit=a6bcd4692fb39fa2fad260f290bff545d4425829&rBranch=main&rCommit=e96c7c4bb0f6aeae2ab3b6f040f7d67edbec199a Overall, the benchmarks show minimal change in performance. Differential Revision: [D79599286](https://our.internmc.facebook.com/intern/diff/D79599286) Pull Request resolved: https://github.com/pytorch/pytorch/pull/159810 Approved by: https://github.com/ngimel, https://github.com/eellison
This commit is contained in:
committed by
PyTorch MergeBot
parent
94b91a8763
commit
1f4057c11a
@ -746,31 +746,6 @@ class CommonTemplate:
|
||||
# Check the code for multiple Rn_BLOCK's
|
||||
self._assert_reduction_ndims(code, 2)
|
||||
|
||||
def test_2d_reduction_no_x_dim(self):
|
||||
"""
|
||||
Tests a 2D reduction without an "x" dimension.
|
||||
"""
|
||||
# We need a size to get no x dim.
|
||||
view = self._discontiguous_tensor((2, 346), self.device)
|
||||
|
||||
# Expect 1 block pointer for the input.
|
||||
result, (code,) = self._run_and_compare(
|
||||
torch.prod,
|
||||
view,
|
||||
expected_num_block_pointers=1,
|
||||
expected_num_triton_kernels=1,
|
||||
config_patches=tiled_reduction_config,
|
||||
)
|
||||
|
||||
# Check that there's no X dimension in the signature.
|
||||
(signature_line,) = (
|
||||
line for line in code.splitlines() if line.startswith("def triton")
|
||||
)
|
||||
self.assertNotIn("BLOCK", signature_line)
|
||||
|
||||
# Check for 2 reduction dimensions in the body.
|
||||
self._assert_reduction_ndims(code, 2)
|
||||
|
||||
@parametrize(
|
||||
"size,expected_num_block_pointers,expected_num_triton_kernels,expect_fallback",
|
||||
[
|
||||
|
@ -196,18 +196,6 @@ class InductorChoices:
|
||||
features.reduction_numel, threshold
|
||||
) # type: ignore[arg-types]
|
||||
|
||||
@staticmethod
|
||||
def want_no_x_dim(features: SIMDKernelFeatures) -> bool:
|
||||
"""
|
||||
Heuristic to decide if we should drop the X dimension from a persistent reduction kernel.
|
||||
So the [XBLOCK, RBLOCK] block becomes a [RBLOCK] block and XBLOCK is forced to be always 1.
|
||||
Strangely this is faster than a [1, RBLOCK] block in some cases.
|
||||
"""
|
||||
return (
|
||||
features.get_reduction_hint() == ReductionHint.INNER
|
||||
and V.graph.sizevars.statically_known_geq(features.reduction_numel, 256)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def reduction_split_factor(
|
||||
device: torch.device,
|
||||
|
@ -2001,14 +2001,12 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
|
||||
)
|
||||
|
||||
def want_no_x_dim(self):
|
||||
if (
|
||||
return (
|
||||
self.persistent_reduction
|
||||
and len(self.numels) == self.num_reduction_dims + 1
|
||||
):
|
||||
if self.fixed_config:
|
||||
return self.fixed_config["XBLOCK"] == 1
|
||||
return V.choices.want_no_x_dim(self.features)
|
||||
return False
|
||||
and self.fixed_config
|
||||
and self.fixed_config["XBLOCK"] == 1
|
||||
)
|
||||
|
||||
@property
|
||||
def assert_function(self) -> str:
|
||||
|
Reference in New Issue
Block a user