mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
remove fast_flush arguments (#135387)
I've removed them from upstream Triton in https://github.com/triton-lang/triton/pull/4485. It looks like most places in the code use the default value of `fast_flush=True` anyway, though there are two PRs from @pearu that use `False`. To my knowledge, there's no reason to use the `False` value. Differential Revision: [D62325778](https://our.internmc.facebook.com/intern/diff/D62325778) Pull Request resolved: https://github.com/pytorch/pytorch/pull/135387 Approved by: https://github.com/nmacchioni, https://github.com/jansel
This commit is contained in:
@ -28,9 +28,7 @@ def create_blocked_tensor(B, M, N, blocksize, sparsity, dtype, device):
|
||||
|
||||
|
||||
def _test_worker(test_func):
|
||||
ms, ms_min, ms_max = benchmarker.benchmark_gpu(
|
||||
test_func, warmup=500, rep=100, fast_flush=False
|
||||
)
|
||||
ms, ms_min, ms_max = benchmarker.benchmark_gpu(test_func, warmup=500, rep=100)
|
||||
|
||||
tflops = 2 * m * k * n * 1e-12 / (ms * 1e-3)
|
||||
return ms, tflops
|
||||
|
@ -309,7 +309,7 @@ class MultiKernelCall:
|
||||
return inner
|
||||
|
||||
return [
|
||||
benchmarker.benchmark_gpu(wrap_fn(kernel), rep=40, fast_flush=True)
|
||||
benchmarker.benchmark_gpu(wrap_fn(kernel), rep=40)
|
||||
for kernel in self.kernels
|
||||
]
|
||||
|
||||
|
@ -2503,7 +2503,7 @@ class TritonKernel(SIMDKernel):
|
||||
|
||||
result.writeline("args = get_args()")
|
||||
result.writeline(
|
||||
"ms = benchmarker.benchmark_gpu(lambda: call(args), rep=40, fast_flush=True)"
|
||||
"ms = benchmarker.benchmark_gpu(lambda: call(args), rep=40)"
|
||||
)
|
||||
result.writeline(f"num_gb = {num_gb}")
|
||||
result.writeline("gb_per_s = num_gb / (ms / 1e3)")
|
||||
|
@ -994,7 +994,7 @@ class ComboKernel(Kernel):
|
||||
|
||||
result.writeline("args = get_args()")
|
||||
result.writeline(
|
||||
"ms = benchmarker.benchmark_gpu(lambda: call(args), rep=40, fast_flush=True)"
|
||||
"ms = benchmarker.benchmark_gpu(lambda: call(args), rep=40)"
|
||||
)
|
||||
result.writeline(f"num_gb = {num_gb}")
|
||||
result.writeline("gb_per_s = num_gb / (ms / 1e3)")
|
||||
|
@ -672,7 +672,7 @@ class CachingAutotuner(KernelInterface):
|
||||
|
||||
return do_bench_using_profiling(kernel_call, warmup=10, rep=40)
|
||||
|
||||
return benchmarker.benchmark_gpu(kernel_call, rep=40, fast_flush=True)
|
||||
return benchmarker.benchmark_gpu(kernel_call, rep=40)
|
||||
|
||||
def clone_args(self, *args, **kwargs) -> Tuple[List[Any], Dict[str, Any]]:
|
||||
from ..compile_fx import clone_preserve_strides
|
||||
|
@ -120,9 +120,7 @@ def benchmark_all_kernels(benchmark_name, benchmark_all_configs):
|
||||
f" {get_info_str(ms, launcher.n_regs, launcher.n_spills, launcher.shared)} @ {launcher.config}"
|
||||
)
|
||||
else:
|
||||
ms = benchmarker.benchmark_gpu(
|
||||
lambda: kernel_mod.call(args), rep=40, fast_flush=True
|
||||
)
|
||||
ms = benchmarker.benchmark_gpu(lambda: kernel_mod.call(args), rep=40)
|
||||
assert (
|
||||
len(triton_kernel.launchers) == 1
|
||||
), "Autotuner should have selected the best config"
|
||||
|
@ -507,9 +507,7 @@ def optimize_scatter_mm(
|
||||
def test_func():
|
||||
return bsr_scatter_mm(bsr, dense, indices_data=indices_data)
|
||||
|
||||
ms_min = triton.testing.do_bench(
|
||||
test_func, warmup=500, rep=100, fast_flush=False
|
||||
)
|
||||
ms_min = triton.testing.do_bench(test_func, warmup=500, rep=100)
|
||||
|
||||
return ms_min
|
||||
|
||||
@ -663,7 +661,7 @@ def tune_bsr_dense_addmm(
|
||||
input, bsr, dense, beta=beta, alpha=alpha, meta=meta, out=out
|
||||
)
|
||||
|
||||
return triton.testing.do_bench(test_func, warmup=500, rep=100, fast_flush=False)
|
||||
return triton.testing.do_bench(test_func, warmup=500, rep=100)
|
||||
|
||||
# The step function that increments a specified meta parameter:
|
||||
def step_meta_parameter(name, value, direction, meta, M=M, N=N, K=K, BM=BM, BK=BK):
|
||||
@ -866,9 +864,7 @@ def main(op="scatter_mm", force=False, dtype=torch.float16, verbose=True):
|
||||
else:
|
||||
raise NotImplementedError(op)
|
||||
|
||||
ms_min = triton.testing.do_bench(
|
||||
test_func, warmup=500, rep=100, fast_flush=False
|
||||
)
|
||||
ms_min = triton.testing.do_bench(test_func, warmup=500, rep=100)
|
||||
|
||||
return ms_min
|
||||
|
||||
|
Reference in New Issue
Block a user