diff --git a/pyrefly.toml b/pyrefly.toml
index 88054d605258..ad74e4df084c 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -22,10 +22,8 @@ project-includes = [
 project-excludes = [
   # ==== below will be enabled directory by directory ====
   # ==== to test Pyrefly on a specific directory, simply comment it out ====
+  "torch/_inductor/runtime",
   "torch/_inductor/codegen/triton.py",
-  "torch/_inductor/runtime/triton_helpers.py",
-  "torch/_inductor/runtime/triton_heuristics.py",
-  "torch/_inductor/runtime/halide_helpers.py",
   # formatting issues, will turn on after adjusting where suppressions can be
   # in import statements
   "torch/linalg/__init__.py",
diff --git a/torch/_inductor/codegen/common.py b/torch/_inductor/codegen/common.py
index 743baec01dfa..36ded3aea2fe 100644
--- a/torch/_inductor/codegen/common.py
+++ b/torch/_inductor/codegen/common.py
@@ -1739,7 +1739,6 @@ class KernelArgs:
         for outer, inner in chain(
             # pyrefly: ignore  # bad-argument-type
             self.input_buffers.items(),
-            # pyrefly: ignore  # bad-argument-type
             self.output_buffers.items(),
         ):
             if outer in self.inplace_buffers or isinstance(inner, RemovedArg):
diff --git a/torch/_inductor/codegen/cpp_gemm_template.py b/torch/_inductor/codegen/cpp_gemm_template.py
index cb17b5a7deb0..9b26105bab10 100644
--- a/torch/_inductor/codegen/cpp_gemm_template.py
+++ b/torch/_inductor/codegen/cpp_gemm_template.py
@@ -1480,7 +1480,6 @@ class CppGemmTemplate(CppTemplate):
             gemm_output_buffer = ir.Buffer(
                 # pyrefly: ignore  # missing-attribute
                 name=gemm_output_name,
-                # pyrefly: ignore  # missing-attribute
                 layout=template_buffer.layout,
             )
             current_input_buffer = gemm_output_buffer
@@ -1504,7 +1503,6 @@ class CppGemmTemplate(CppTemplate):
                     current_input_buffer = ir.Buffer(
                         # pyrefly: ignore  # missing-attribute
                         name=buffer_name,
-                        # pyrefly: ignore  # missing-attribute
                         layout=template_buffer.layout,
                     )
 
diff --git a/torch/_inductor/codegen/cpp_wrapper_gpu.py b/torch/_inductor/codegen/cpp_wrapper_gpu.py
index dd4a3a984d34..d1ddc7e1cd40 100644
--- a/torch/_inductor/codegen/cpp_wrapper_gpu.py
+++ b/torch/_inductor/codegen/cpp_wrapper_gpu.py
@@ -824,7 +824,6 @@ class CppWrapperGpu(CppWrapperCpu):
             call_args, arg_types = self.prepare_triton_wrapper_args(
                 # pyrefly: ignore  # bad-argument-type
                 call_args,
-                # pyrefly: ignore  # bad-argument-type
                 arg_types,
             )
             wrapper_name = f"call_{kernel_name}"
diff --git a/torch/_inductor/codegen/mps.py b/torch/_inductor/codegen/mps.py
index fb3939531b71..a74506d7247a 100644
--- a/torch/_inductor/codegen/mps.py
+++ b/torch/_inductor/codegen/mps.py
@@ -683,7 +683,6 @@ class MetalKernel(SIMDKernel):
                     # pyrefly: ignore  # missing-argument
                     t
                     for t in self.range_tree_nodes.values()
-                    # pyrefly: ignore  # missing-argument
                     if t.is_reduction
                 )
                 cmp_op = ">" if reduction_type == "argmax" else "<"
@@ -866,7 +865,6 @@ class MetalKernel(SIMDKernel):
                     # pyrefly: ignore  # missing-argument
                     t.numel
                     for t in self.range_trees
-                    # pyrefly: ignore  # missing-argument
                     if t.is_reduction
                 )
                 # If using dynamic shapes, set the threadgroup size to be the
diff --git a/torch/_inductor/codegen/simd.py b/torch/_inductor/codegen/simd.py
index 79d0b603220a..e2294f05ddca 100644
--- a/torch/_inductor/codegen/simd.py
+++ b/torch/_inductor/codegen/simd.py
@@ -968,7 +968,6 @@ class SIMDKernel(Kernel[CSEVariableType], Generic[CSEVariableType]):
             # pyrefly: ignore  # missing-argument
             t
             for t in self.range_trees
-            # pyrefly: ignore  # missing-argument
             if not t.is_reduction or self.inside_reduction
         ]
 
diff --git a/torch/_inductor/codegen/wrapper_fxir.py b/torch/_inductor/codegen/wrapper_fxir.py
index e123f9592770..72c8e0335508 100644
--- a/torch/_inductor/codegen/wrapper_fxir.py
+++ b/torch/_inductor/codegen/wrapper_fxir.py
@@ -1004,7 +1004,6 @@ class FxConverter:
                 # pyrefly: ignore  # missing-attribute
                 call_kwargs[key]
                 for key in signature
-                # pyrefly: ignore  # missing-attribute
                 if key not in cfg.kwargs
             ]
 
diff --git a/torch/_inductor/runtime/autotune_cache.py b/torch/_inductor/runtime/autotune_cache.py
index 63d7a52ff7d7..3c55a9cd1b08 100644
--- a/torch/_inductor/runtime/autotune_cache.py
+++ b/torch/_inductor/runtime/autotune_cache.py
@@ -275,11 +275,8 @@ class AutotuneCache:
         triton_cache_hash: str | None = None,
     ) -> None:
         data = {
-            # pyrefly: ignore  # missing-attribute
             **config.kwargs,
-            # pyrefly: ignore  # missing-attribute
             "num_warps": config.num_warps,
-            # pyrefly: ignore  # missing-attribute
             "num_stages": config.num_stages,
             "configs_hash": self.configs_hash,
             "found_by_coordesc": found_by_coordesc,
@@ -573,20 +570,15 @@ def _load_cached_autotuning(
             )
 
         # Create the triton_config with the appropriate arguments
-        # pyrefly: ignore  # bad-argument-count
         triton_config = Config(best_config, **config_args)
-        # pyrefly: ignore  # missing-attribute
         triton_config.found_by_coordesc = True
         return triton_config
 
     matching_configs = [
         cfg
         for cfg in configs
-        # pyrefly: ignore  # missing-attribute
         if all(val == best_config.get(key) for key, val in cfg.kwargs.items())
-        # pyrefly: ignore  # missing-attribute
         and cfg.num_warps == best_config.get("num_warps")
-        # pyrefly: ignore  # missing-attribute
         and cfg.num_stages == best_config.get("num_stages")
     ]
     if len(matching_configs) != 1:
diff --git a/torch/_inductor/runtime/benchmarking.py b/torch/_inductor/runtime/benchmarking.py
index ee504b1a0575..698484658ddd 100644
--- a/torch/_inductor/runtime/benchmarking.py
+++ b/torch/_inductor/runtime/benchmarking.py
@@ -123,7 +123,6 @@ class Benchmarker:
         - The runtime of `fn(*fn_args, **fn_kwargs)`, in milliseconds.
         """
         inferred_device = None
-        # pyrefly: ignore  # bad-assignment
         for arg_or_kwarg in chain(fn_args, fn_kwargs.values()):
             if not isinstance(arg_or_kwarg, torch.Tensor):
                 continue
@@ -197,7 +196,6 @@ class TritonBenchmarker(Benchmarker):
 
     @may_distort_benchmarking_result
     @time_and_count
-    # pyrefly: ignore  # bad-override
     def benchmark_gpu(
         self: Self,
         _callable: Callable[[], Any],
diff --git a/torch/_inductor/runtime/caching/implementations.py b/torch/_inductor/runtime/caching/implementations.py
index 8292b957f562..abc113caae93 100644
--- a/torch/_inductor/runtime/caching/implementations.py
+++ b/torch/_inductor/runtime/caching/implementations.py
@@ -190,7 +190,6 @@ class _OnDiskCacheImpl(_CacheImpl):
                     Defaults to empty string if not specified.
         """
         self._cache_dir: Path = self._base_dir / (sub_dir or "")
-        # pyrefly: ignore  # bad-assignment
         self._flock: FileLock = FileLock(str(self._cache_dir / "dir.lock"))
 
     @property
diff --git a/torch/_inductor/runtime/coordinate_descent_tuner.py b/torch/_inductor/runtime/coordinate_descent_tuner.py
index 30e0acfca4fe..faa2b06bcaf1 100644
--- a/torch/_inductor/runtime/coordinate_descent_tuner.py
+++ b/torch/_inductor/runtime/coordinate_descent_tuner.py
@@ -186,7 +186,6 @@ class CoordescTuner:
 
     def check_all_tuning_directions(
         self,
-        # pyrefly: ignore  # missing-attribute
         func: Callable[["triton.Config"], float],
         best_config,
         best_timing,
@@ -256,12 +255,10 @@ class CoordescTuner:
 
     def autotune(
         self,
-        func: Callable[
-            ["triton.Config"], float  # pyrefly: ignore  # missing-attribute
-        ],
-        baseline_config: "triton.Config",  # pyrefly: ignore  # missing-attribute
-        baseline_timing: float | None = None,  # pyrefly: ignore  # missing-attribute
-    ) -> "triton.Config":  # pyrefly: ignore  # missing-attribute
+        func: Callable[["triton.Config"], float],
+        baseline_config: "triton.Config",
+        baseline_timing: float | None = None,
+    ) -> "triton.Config":
         if baseline_timing is None:
             baseline_timing = self.call_func(func, baseline_config)
 
diff --git a/torch/_inductor/runtime/hints.py b/torch/_inductor/runtime/hints.py
index 71ba05011e41..1cff04d04079 100644
--- a/torch/_inductor/runtime/hints.py
+++ b/torch/_inductor/runtime/hints.py
@@ -88,13 +88,11 @@ if has_triton_package():
             divisible_by_16=None,
             equal_to_1=None,
         ):
-            # pyrefly: ignore  # not-iterable
             return {(x,): [["tt.divisibility", 16]] for x in divisible_by_16}
 
 else:
     # Define a namedtuple as a fallback when AttrsDescriptor is not available
     AttrsDescriptorWrapper = collections.namedtuple(  # type: ignore[no-redef, name-match]
-        # pyrefly: ignore  # invalid-argument
         "AttrsDescriptor",
         ["divisible_by_16", "equal_to_1"],
         defaults=[(), ()],
diff --git a/torch/_inductor/runtime/runtime_utils.py b/torch/_inductor/runtime/runtime_utils.py
index 30087d95663a..21cd5987f8f4 100644
--- a/torch/_inductor/runtime/runtime_utils.py
+++ b/torch/_inductor/runtime/runtime_utils.py
@@ -68,11 +68,8 @@ def triton_config_to_hashable(cfg: Config) -> Hashable:
     Convert triton config to a tuple that can uniquely identify it. We can use
     the return value as a dictionary key.
     """
-    # pyrefly: ignore  # missing-attribute
     items = sorted(cfg.kwargs.items())
-    # pyrefly: ignore  # missing-attribute
     items.append(("num_warps", cfg.num_warps))
-    # pyrefly: ignore  # missing-attribute
     items.append(("num_stages", cfg.num_stages))
     return tuple(items)
 
@@ -106,7 +103,6 @@ def get_max_y_grid() -> int:
 
 
 try:
-    # pyrefly: ignore  # import-error
     import colorama
 
     HAS_COLORAMA = True
@@ -118,7 +114,6 @@ except ModuleNotFoundError:
 if HAS_COLORAMA:
 
     def _color_text(msg: str, color: str) -> str:
-        # pyrefly: ignore  # missing-attribute
         return getattr(colorama.Fore, color.upper()) + msg + colorama.Fore.RESET
 
 else:
diff --git a/torch/_inductor/runtime/static_cuda_launcher.py b/torch/_inductor/runtime/static_cuda_launcher.py
index e7d4705740e5..a5e511052b28 100644
--- a/torch/_inductor/runtime/static_cuda_launcher.py
+++ b/torch/_inductor/runtime/static_cuda_launcher.py
@@ -34,29 +34,21 @@ class StaticallyLaunchedCudaKernel:
     """
 
     def __init__(self, kernel: CompiledKernel) -> None:
-        # pyrefly: ignore  # missing-attribute
         self.name = kernel.src.fn.__name__
-        # pyrefly: ignore  # missing-attribute
         self.cubin_raw = kernel.asm.get("cubin", None)
-        # pyrefly: ignore  # missing-attribute
         self.cubin_path = kernel._cubin_path
 
         # Used by torch.compile to filter constants in older triton versions
-        # pyrefly: ignore  # missing-attribute
         self.arg_names = kernel.src.fn.arg_names
 
         # Const exprs that are declared by the triton kernel directly
         # Used to generate the kernel launcher's def args
-        # pyrefly: ignore  # missing-attribute
         self.declared_constexprs = kernel.src.fn.constexprs
 
-        # pyrefly: ignore  # missing-attribute
         self.hash = kernel.hash
 
         if triton_knobs is None:
-            # pyrefly: ignore  # missing-attribute
             launch_enter = kernel.__class__.launch_enter_hook
-            # pyrefly: ignore  # missing-attribute
             launch_exit = kernel.__class__.launch_exit_hook
         else:
             launch_enter = triton_knobs.runtime.launch_enter_hook
@@ -78,15 +70,12 @@ class StaticallyLaunchedCudaKernel:
             raise NotImplementedError(
                 "We don't support launch enter or launch exit hooks"
             )
-        # pyrefly: ignore  # missing-attribute
         self.num_warps = kernel.metadata.num_warps
         self.shared = (
-            # pyrefly: ignore  # missing-attribute
             kernel.shared if hasattr(kernel, "shared") else kernel.metadata.shared
         )
 
         def needs_scratch_arg(scratch_name: str, param_name: str) -> bool:
-            # pyrefly: ignore  # missing-attribute
             if hasattr(kernel.metadata, param_name):
                 if getattr(kernel.metadata, param_name) > 0:
                     raise NotImplementedError(
@@ -102,7 +91,6 @@ class StaticallyLaunchedCudaKernel:
         # same situation for profile scratch - triton-lang/triton#7258
         self.has_profile_scratch = needs_scratch_arg("Profile", "profile_scratch_size")
 
-        # pyrefly: ignore  # missing-attribute
         self.arg_tys = self.arg_ty_from_signature(kernel.src)
         self.function: int | None = None  # Loaded by load_kernel(on the parent process)
         num_ctas = 1
@@ -182,7 +170,6 @@ class StaticallyLaunchedCudaKernel:
     def arg_ty_from_signature(self, src: ASTSource) -> str:
         def index_key(i: Any) -> int:
             if isinstance(i, str):
-                # pyrefly: ignore  # missing-attribute
                 return src.fn.arg_names.index(i)
             elif isinstance(i, tuple):
                 # In triton 3.3, src.fn.constants has tuples as a key
@@ -190,7 +177,6 @@ class StaticallyLaunchedCudaKernel:
             else:
                 return i
 
-        # pyrefly: ignore  # missing-attribute
         signature = {index_key(key): value for key, value in src.signature.items()}
         # Triton uses these as the main way to filter out constants passed to their cubin
         constants = [index_key(key) for key in getattr(src, "constants", dict())]
@@ -212,7 +198,6 @@ class StaticallyLaunchedCudaKernel:
             if ty == "constexpr" or i in constants:
                 pass
             else:
-                # pyrefly: ignore  # bad-argument-type
                 params.append(self.extract_type(ty))
         return "".join(params)
 
@@ -250,7 +235,6 @@ class StaticallyLaunchedCudaKernel:
             if has_scratch:
                 arg_tys = arg_tys + "O"
                 args = (*args, None)
-        # pyrefly: ignore  # bad-argument-type
         assert len(args) == len(arg_tys)
 
         # TODO: can handle grid functions here or in C++, so
@@ -263,7 +247,6 @@ class StaticallyLaunchedCudaKernel:
             self.num_warps,
             self.shared,
             arg_tys,
-            # pyrefly: ignore  # bad-argument-type
             args,
             stream,
         )
diff --git a/torch/fx/experimental/proxy_tensor.py b/torch/fx/experimental/proxy_tensor.py
index 28a60bafcac8..805d59008e02 100644
--- a/torch/fx/experimental/proxy_tensor.py
+++ b/torch/fx/experimental/proxy_tensor.py
@@ -421,7 +421,6 @@ def get_proxy_slot(
             else:
                 # Attempt to build it from first principles.
                 _build_proxy_for_sym_expr(tracer, obj.node.expr, obj)
-                # pyrefly: ignore  # no-matching-overload
                 value = tracker.get(obj)
 
     if value is None: