change guard_or impl for better perf and simplicity (#153674)

PR time benchmarks has been showing regressions as we move to guard_or_false, reason is that prev implementation do not cache.
This new approach will propagate the fallback value to eval and return it. allowing eval to cache and reducing scamming logs and complexity.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/153674
Approved by: https://github.com/bobrenjc93
This commit is contained in:
Laith Sakka
2025-05-23 08:21:26 -07:00
committed by PyTorch MergeBot
parent 4b7abce6a4
commit 9e089bb5b6
3 changed files with 60 additions and 76 deletions

View File

@ -1,8 +1,8 @@
add_loop_eager,compile_time_instruction_count,2960000000,0.015
add_loop_eager,compile_time_instruction_count,2953000000,0.015
add_loop_eager_dynamic,compile_time_instruction_count,5827000000,0.025
add_loop_eager_dynamic,compile_time_instruction_count,5808000000,0.025
@ -10,7 +10,7 @@ add_loop_inductor,compile_time_instruction_count,29370000000,0.015
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,44080000000,0.025
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,44010000000,0.025
@ -22,11 +22,11 @@ basic_modules_ListOfLinears_eager,compile_time_instruction_count,939900000,0.015
basic_modules_ListOfLinears_inductor,compile_time_instruction_count,18240000000,0.015
basic_modules_ListOfLinears_inductor,compile_time_instruction_count,18140000000,0.015
basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,16340000000,0.015
basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,16220000000,0.015
@ -34,31 +34,15 @@ basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,10370000
basic_InlineMod_eager,compile_time_instruction_count,7101000000,0.015
update_hint_regression,compile_time_instruction_count,1681000000,0.02
update_hint_regression,compile_time_instruction_count,1683000000,0.02
float_args,compile_time_instruction_count,449800000,0.015
float_args,compile_time_instruction_count,455100000,0.015
mm_loop_inductor_gpu,compile_time_instruction_count,4407000000,0.015
mm_loop_inductor_dynamic_gpu,compile_time_instruction_count,7381000000,0.015
basic_NestedModule_eager,compile_time_instruction_count,8241000000,0.015
sum_floordiv_regression,compile_time_instruction_count,1000000000,0.015
sum_floordiv_regression,compile_time_instruction_count,998600000,0.015
@ -78,11 +62,11 @@ aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5981000000,0
aotdispatcher_partitioner_cpu,compile_time_instruction_count,8630000000,0.015
aotdispatcher_partitioner_cpu,compile_time_instruction_count,8585000000,0.015
aotdispatcher_partitioner_cpu2,compile_time_instruction_count,1890000000,0.015
aotdispatcher_partitioner_cpu2,compile_time_instruction_count,1900000000,0.015

1 add_loop_eager compile_time_instruction_count 2960000000 2953000000 0.015
2 add_loop_eager_dynamic compile_time_instruction_count 5827000000 5808000000 0.025
3 add_loop_inductor compile_time_instruction_count 29370000000 29370000000 0.015
4 add_loop_inductor_dynamic_gpu compile_time_instruction_count 44080000000 44010000000 0.025
5 add_loop_inductor_gpu compile_time_instruction_count 25900000000 25900000000 0.015
6 basic_modules_ListOfLinears_eager compile_time_instruction_count 939900000 939900000 0.015
7 basic_modules_ListOfLinears_inductor compile_time_instruction_count 18240000000 18140000000 0.015
8 basic_modules_ListOfLinears_inductor_gpu_force_shape_pad compile_time_instruction_count 16340000000 16220000000 0.015
10 basic_InlineMod_eager update_hint_regression compile_time_instruction_count 7101000000 1681000000 0.015 0.02
11 update_hint_regression float_args compile_time_instruction_count 1683000000 449800000 0.02 0.015
12 float_args sum_floordiv_regression compile_time_instruction_count 455100000 998600000 0.015
13 mm_loop_inductor_gpu symint_sum compile_time_instruction_count 4407000000 3252000000 0.015
14 mm_loop_inductor_dynamic_gpu symint_sum_loop compile_time_instruction_count 7381000000 4262000000 0.015
15 basic_NestedModule_eager aotdispatcher_inference_nosubclass_cpu compile_time_instruction_count 8241000000 2091000000 0.015
16 sum_floordiv_regression aotdispatcher_inference_subclass_cpu compile_time_instruction_count 1000000000 5981000000 0.015
22 1890000000
23 3818000000
24 10350000000
25
26
27
28
29
30
31
32
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
62
63
64
65
66
67
68
69
70
71
72