diff --git a/mypy.ini b/mypy.ini
index 4895da84b776..c13b026e2ba1 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -13,6 +13,7 @@ show_column_numbers = True
 check_untyped_defs = True
 follow_imports = normal
 local_partial_types = True
+enable_error_code = possibly-undefined
 
 # do not reenable this:
 # https://github.com/pytorch/pytorch/pull/60006#issuecomment-866130657
diff --git a/test/typing/pass/creation_ops.py b/test/typing/pass/creation_ops.py
index aecef48e7e24..c524d56f1971 100644
--- a/test/typing/pass/creation_ops.py
+++ b/test/typing/pass/creation_ops.py
@@ -1,3 +1,4 @@
+# mypy: disable-error-code="possibly-undefined"
 # flake8: noqa
 import torch
 from torch.testing._internal.common_utils import TEST_NUMPY
diff --git a/test/typing/reveal/tensor_constructors.py b/test/typing/reveal/tensor_constructors.py
index c0abbe3169de..5f6404224361 100644
--- a/test/typing/reveal/tensor_constructors.py
+++ b/test/typing/reveal/tensor_constructors.py
@@ -1,3 +1,4 @@
+# mypy: disable-error-code="possibly-undefined"
 # flake8: noqa
 import torch
 from torch.testing._internal.common_utils import TEST_NUMPY
diff --git a/torch/__init__.py b/torch/__init__.py
index 0ff118ed04a2..644c6817986a 100644
--- a/torch/__init__.py
+++ b/torch/__init__.py
@@ -515,7 +515,7 @@ for name in ("sqrt", "cos", "cosh", "sin", "sinh", "tan", "tanh", "asin", "acos"
 sym_sqrt = current_module._sym_sqrt
 __all__.append("sym_sqrt")
 
-del fn, name, sym_name, current_module
+del fn, name, sym_name, current_module  # type: ignore[possibly-undefined]
 
 
 def sym_ite(b, t, f):
diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py
index 447f312a1b42..54a6fe8443c4 100644
--- a/torch/_decomp/decompositions.py
+++ b/torch/_decomp/decompositions.py
@@ -2832,7 +2832,7 @@ def _rnn_helper(
             final_hiddens.append(bwd_hidden)
 
         if bidirectional:
-            input = torch.cat([fwd_inp, bwd_inp], fwd_inp.dim() - 1)
+            input = torch.cat([fwd_inp, bwd_inp], fwd_inp.dim() - 1)  # type: ignore[possibly-undefined]
         else:
             input = fwd_inp
 
diff --git a/torch/_dynamo/convert_frame.py b/torch/_dynamo/convert_frame.py
index 308203210711..3571ec0f440b 100644
--- a/torch/_dynamo/convert_frame.py
+++ b/torch/_dynamo/convert_frame.py
@@ -163,7 +163,7 @@ def preserve_global_state(fn):
             random.setstate(py_rng_state)
             torch.random.set_rng_state(torch_rng_state)
             if torch.cuda.is_available():
-                torch.cuda.set_rng_state(cuda_rng_state)
+                torch.cuda.set_rng_state(cuda_rng_state)  # type: ignore[possibly-undefined]
             torch.fx.graph_module._forward_from_src = prior_fwd_from_src
             assert (
                 guards.check()
@@ -568,7 +568,7 @@ def _compile(
             code.co_name,
             code.co_filename,
             code.co_firstlineno,
-            out_code,
+            out_code,  # type: ignore[possibly-undefined]
         )
 
         for hook in _bytecode_hooks.values():
diff --git a/torch/_dynamo/debug_utils.py b/torch/_dynamo/debug_utils.py
index b6d42c2a375c..1838c0b30cef 100644
--- a/torch/_dynamo/debug_utils.py
+++ b/torch/_dynamo/debug_utils.py
@@ -46,7 +46,7 @@ if use_buck:
         "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu",
         "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops",
     ]
-    cur_target = libfb.py.build_info.BuildInfo.get_build_rule().replace("fbcode:", "//")
+    cur_target = libfb.py.build_info.BuildInfo.get_build_rule().replace("fbcode:", "//")  # type: ignore[possibly-undefined]
     extra_imports = "\n".join([f'torch.ops.load_library("{x}")' for x in extra_deps])
 
 
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
index 5fd5ee99c159..8085f0b988ac 100644
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@@ -1430,7 +1430,7 @@ def export(
                 example_fake_inputs,
                 graph_captured_input,
                 graph_captured_result,
-                result_traced,
+                result_traced,  # type: ignore[possibly-undefined]
                 flat_args_dynamic_dims,
             )
         # Store constraints and inputs as metadata for user passes, e.g. turn constraints to runtime check
diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py
index b64ac7e2ec8f..fdfcd9fb298d 100644
--- a/torch/_dynamo/symbolic_convert.py
+++ b/torch/_dynamo/symbolic_convert.py
@@ -1115,7 +1115,7 @@ class InstructionTranslatorBase(Checkpointable[InstructionTranslatorGraphState])
             tos = self.pop()
         _ = self.pop()
         if preserve_tos:
-            self.push(tos)
+            self.push(tos)  # type: ignore[possibly-undefined]
 
     def FOR_ITER(self, inst):
         it = self.pop().realize()
diff --git a/torch/_dynamo/test_minifier_common.py b/torch/_dynamo/test_minifier_common.py
index 477018eab426..d12e5a92315a 100644
--- a/torch/_dynamo/test_minifier_common.py
+++ b/torch/_dynamo/test_minifier_common.py
@@ -118,7 +118,7 @@ torch._inductor.config.{"cpp" if device == "cpu" else "triton"}.inject_relu_bug_
                 finally:
                     log.removeHandler(log_handler)
                     if cwd is not None:
-                        os.chdir(prev_cwd)
+                        os.chdir(prev_cwd)  # type: ignore[possibly-undefined]
                     # Make sure we don't leave buggy compiled frames lying
                     # around
                     torch._dynamo.reset()
diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py
index 6655ad735493..10b3c92520c6 100644
--- a/torch/_dynamo/utils.py
+++ b/torch/_dynamo/utils.py
@@ -773,7 +773,7 @@ def preserve_rng_state():
         with torch.utils._python_dispatch._disable_current_modes():
             torch.random.set_rng_state(rng_state)
             if torch.cuda.is_available():
-                torch.cuda.set_rng_state(cuda_rng_state)
+                torch.cuda.set_rng_state(cuda_rng_state)  # type: ignore[possibly-undefined]
 
 
 def is_jit_model(model0):
@@ -892,7 +892,7 @@ def timed(model, example_inputs, times=1):
         result = model(*example_inputs)
         synchronize()
     t1 = time.perf_counter()
-    return result, t1 - t0
+    return result, t1 - t0  # type: ignore[possibly-undefined]
 
 
 def check_is_cuda(gm, example_inputs):
diff --git a/torch/_functorch/autograd_function.py b/torch/_functorch/autograd_function.py
index d1cf408c73c5..0a4fbf81c725 100644
--- a/torch/_functorch/autograd_function.py
+++ b/torch/_functorch/autograd_function.py
@@ -199,7 +199,7 @@ def wrap_outputs_maintaining_identity(
             result.append(unwrapped_input_to_orig_input[id(output)])
             continue
         if out_dims_specified:
-            result.append(wrap_fn(output, flat_out_dims[i]))  # type: ignore[index]
+            result.append(wrap_fn(output, flat_out_dims[i]))  # type: ignore[possibly-undefined, index]
         else:
             result.append(wrap_fn(output))
 
diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py
index 2af982949ecf..cadb783e063d 100644
--- a/torch/_higher_order_ops/triton_kernel_wrap.py
+++ b/torch/_higher_order_ops/triton_kernel_wrap.py
@@ -163,7 +163,7 @@ def parse_ttir(ttir, kwargs):
         return None
 
     try:
-        import lark
+        import lark  # type: ignore[import-not-found]
         from lark import Lark, Transformer, v_args
     except ModuleNotFoundError:
         warnings.warn(
diff --git a/torch/_inductor/autotune_process.py b/torch/_inductor/autotune_process.py
index 4b89a9ce9283..a05e905254b3 100644
--- a/torch/_inductor/autotune_process.py
+++ b/torch/_inductor/autotune_process.py
@@ -440,25 +440,25 @@ class BenchmarkRequest:
             output_tensor = self.output_tensor_meta.to_tensor()
 
         if debug:
-            create_tensor_elapse = time.time() - start_ts
+            create_tensor_elapse = time.time() - start_ts  # type: ignore[possibly-undefined]
             start_ts = time.time()
 
         fn = self.make_run_fn(*input_tensors, output_tensor=output_tensor)
 
         if debug:
-            load_elapse = time.time() - start_ts
+            load_elapse = time.time() - start_ts  # type: ignore[possibly-undefined]
             start_ts = time.time()
 
         out = do_bench(fn)
         torch.cuda.synchronize()  # shake out any CUDA errors
 
         if debug:
-            bench_elapse = time.time() - start_ts
+            bench_elapse = time.time() - start_ts  # type: ignore[possibly-undefined]
             log.debug(
                 "InChildProcess %s: load %f, create tensor %f, bench %f",
                 str(self),
-                load_elapse,
-                create_tensor_elapse,
+                load_elapse,  # type: ignore[possibly-undefined]
+                create_tensor_elapse,  # type: ignore[possibly-undefined]
                 bench_elapse,
             )
         self.cleanup_run_fn()
diff --git a/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py b/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py
index 0eeb9abe1caf..1e4828aab466 100644
--- a/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py
+++ b/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py
@@ -99,7 +99,7 @@ class CutlassEVTEpilogueTypeFormatter:
                 result = pnode.inner_fn(index)
                 # each epilogue node results in a single "using" statement and may refer to the previous steps by name
                 formatter.aliases[node.name] = result
-            res = formatter.getvalue(result)
+            res = formatter.getvalue(result)  # type: ignore[possibly-undefined]
             if _MAGIC_SYMPY_ERROR_STRING in res:
                 raise CUTLASSEVTOpNotImplementedError(
                     "sympy / indexing expressions not yet supported in EVT fusion"
@@ -266,7 +266,7 @@ class CutlassEVTEpilogueArgumentFormatter:
                 if node.name is not None:
                     formatter.aliases[node.name] = result
 
-            res: str = formatter.getvalue(result)
+            res: str = formatter.getvalue(result)  # type: ignore[possibly-undefined]
             if _MAGIC_SYMPY_ERROR_STRING in res:
                 raise CUTLASSEVTOpNotImplementedError(
                     "sympy / indexing expressions not yet supported in EVT fusion"
diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py
index eea5c1694490..fc70ebbd771c 100644
--- a/torch/_inductor/codegen/wrapper.py
+++ b/torch/_inductor/codegen/wrapper.py
@@ -155,7 +155,7 @@ def get_cpp_op_schema(kernel: torch._ops.OpOverload) -> str:
         cpp_return_value = f"std::tuple<{tuple_returns}>"
 
     cpp_arg_type = [f"{convert_arg_type(arg)} {arg.name}" for arg in args]
-    return f"{cpp_return_value}({', '.join(cpp_arg_type)})"
+    return f"{cpp_return_value}({', '.join(cpp_arg_type)})"  # type: ignore[possibly-undefined]
 
 
 # TODO: Move to a well known place
diff --git a/torch/_inductor/comm_analysis.py b/torch/_inductor/comm_analysis.py
index 483ccfe2a1c8..f1f555ff45ac 100644
--- a/torch/_inductor/comm_analysis.py
+++ b/torch/_inductor/comm_analysis.py
@@ -209,7 +209,7 @@ def estimate_nccl_collective_runtime(snode: "BaseSchedulerNode") -> float:
         nsteps = nRanks - 1
 
     # Convert bus BW to algorithm BW (tensor bytes / algoBW = actual execution time)
-    ratio = (1.0 * nRanks) / nsteps
+    ratio = (1.0 * nRanks) / nsteps  # type: ignore[possibly-undefined]
     bandwidth = busBw * ratio
     # Convert GB/s to GB/ns
     bandwidth_GB_per_ns = bandwidth / 1e9
@@ -236,7 +236,7 @@ def estimate_nccl_collective_runtime(snode: "BaseSchedulerNode") -> float:
     if nNodes > 1:
         netOverhead = 1.0  # getNetOverhead(comm);
     intraLat = max(intraLat, netOverhead)
-    latency += (nsteps - nInterSteps) * intraLat + nInterSteps * interLat
+    latency += (nsteps - nInterSteps) * intraLat + nInterSteps * interLat  # type: ignore[possibly-undefined]
     # Convert us to ns
     latency_ns = latency * 1e3
 
diff --git a/torch/_inductor/fx_passes/group_batch_fusion.py b/torch/_inductor/fx_passes/group_batch_fusion.py
index ff80e82ba741..bb7fd2cb39a4 100644
--- a/torch/_inductor/fx_passes/group_batch_fusion.py
+++ b/torch/_inductor/fx_passes/group_batch_fusion.py
@@ -170,9 +170,9 @@ class PostGradBatchLinearFusion(BatchFusion):
                 input, weight = node.args
                 bias = None
             batch_nodes.append(node)
-            batch_inputs.append(input)
-            batch_weights.append(weight)
-            batch_biases.append(bias)
+            batch_inputs.append(input)  # type: ignore[possibly-undefined]
+            batch_weights.append(weight)  # type: ignore[possibly-undefined]
+            batch_biases.append(bias)  # type: ignore[possibly-undefined]
 
         with graph.inserting_before(subset[-1]):
             fused_inputs = decompose_stack(graph, batch_inputs)
@@ -191,7 +191,7 @@ class PostGradBatchLinearFusion(BatchFusion):
                     new_bias_add = graph.call_function(
                         aten.add, args=((batch_biases[i], new_mm))
                     )
-            new_mm_cont = new_bias_add if has_bias else new_mm
+            new_mm_cont = new_bias_add if has_bias else new_mm  # type: ignore[possibly-undefined]
             original_mm.replace_all_uses_with(new_mm_cont)
             new_mm_cont.meta.update(original_mm.meta)
             graph.erase_node(original_mm)
diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py
index 3f30d579e4e6..270d5334b518 100644
--- a/torch/_inductor/fx_passes/mkldnn_fusion.py
+++ b/torch/_inductor/fx_passes/mkldnn_fusion.py
@@ -283,7 +283,7 @@ if torch._C._has_mkldnn:
                     L[aten.mul](out, negative_slope),
                 )
                 if lowp_dtype:
-                    out = L[prims.convert_element_type.default](out, dtype=dtype2)
+                    out = L[prims.convert_element_type.default](out, dtype=dtype2)  # type: ignore[possibly-undefined]
                 return out
 
         return fn
@@ -324,7 +324,7 @@ if torch._C._has_mkldnn:
                     out = L[prims.convert_element_type.default](out, dtype=torch.float)
                 out = L[aten.clamp_max](L[aten.clamp_min](out, min_value), max_value)
                 if lowp_dtype:
-                    out = L[prims.convert_element_type.default](out, dtype=dtype2)
+                    out = L[prims.convert_element_type.default](out, dtype=dtype2)  # type: ignore[possibly-undefined]
                 return out
 
         return fn
diff --git a/torch/_inductor/fx_passes/pre_grad.py b/torch/_inductor/fx_passes/pre_grad.py
index af08de064405..7840de4662a5 100644
--- a/torch/_inductor/fx_passes/pre_grad.py
+++ b/torch/_inductor/fx_passes/pre_grad.py
@@ -99,7 +99,7 @@ def pre_grad_passes(gm: torch.fx.GraphModule, example_inputs):
 
         gm_after_fx_passes = gm.__copy__()
         numeric_check_if_enabled(
-            gm_before_fx_passes,
+            gm_before_fx_passes,  # type: ignore[possibly-undefined]
             gm_after_fx_passes,
             example_inputs,
             config.fx_passes_numeric_check.get("num_iterations", 1),
diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py
index 8896c653399f..45f2177a6722 100644
--- a/torch/_inductor/fx_passes/quantization.py
+++ b/torch/_inductor/fx_passes/quantization.py
@@ -1360,7 +1360,7 @@ def _register_qconv_weight_prepack_pass(pattern, pass_number, dtype=torch.float3
             graph.erase_node(conv_node)
             # Erase the dequant pattern
             if dtype == torch.bfloat16:
-                graph.erase_node(convert_to_bf16)
+                graph.erase_node(convert_to_bf16)  # type: ignore[possibly-undefined]
             # Erase the dequant pattern
             graph.erase_node(mul_node)
             graph.erase_node(sub_node)
@@ -1369,7 +1369,7 @@ def _register_qconv_weight_prepack_pass(pattern, pass_number, dtype=torch.float3
             if clone_node is not None:
                 graph.erase_node(clone_node)
             if dtype == torch.bfloat16:
-                graph.erase_node(weight_to_bf16_node)
+                graph.erase_node(weight_to_bf16_node)  # type: ignore[possibly-undefined]
             graph.erase_node(dequant_per_channel)
             counters["inductor"]["qconv2d_weight_prepack_matcher_count"] += 1
             counters["inductor"]["qconv2d_weight_prepack_matcher_nodes"] += len(
@@ -1697,14 +1697,14 @@ def _register_qlinear_weight_prepack_pass(
                 if input_contiguous:
                     graph.erase_node(output_reshape_node)
                 elif not input_contiguous and bias:
-                    graph.erase_node(output_add_node_for_bias)
+                    graph.erase_node(output_add_node_for_bias)  # type: ignore[possibly-undefined]
             graph.erase_node(linear_node)
             if input_dim_exceeds_two:
                 if input_contiguous:
                     graph.erase_node(act_reshape_node)
                 else:
                     graph.erase_node(act_expand_node)
-                    graph.erase_node(wgt_expand_node)
+                    graph.erase_node(wgt_expand_node)  # type: ignore[possibly-undefined]
             if dtype == torch.bfloat16:
                 graph.erase_node(activation_to_bf16_node)
             # Erase the dequant pattern
@@ -1714,7 +1714,7 @@ def _register_qlinear_weight_prepack_pass(
             # Erase the dequant per channel pattern
             graph.erase_node(t_node)
             if dtype == torch.bfloat16:
-                graph.erase_node(weight_to_bf16_node)
+                graph.erase_node(weight_to_bf16_node)  # type: ignore[possibly-undefined]
             graph.erase_node(dequant_per_channel)
 
             counters["inductor"]["qlinear_weight_prepack_matcher_count"] += 1
diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py
index a14ff3f909c8..b197a82ba272 100644
--- a/torch/_inductor/graph.py
+++ b/torch/_inductor/graph.py
@@ -845,7 +845,7 @@ class GraphLowering(torch.fx.Interpreter):
             ):
                 debug("fallback_handler")
                 result = fallback_handler(n.target, add_to_fallback_set=False)(
-                    *args, **kwargs
+                    *args, **kwargs  # type: ignore[possibly-undefined]
                 )
             elif n.op == "call_function" and n.target in layout_constraints:
                 debug("layout_constraints")
diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py
index 4b6e619591a5..d5ee49970d6c 100644
--- a/torch/_inductor/lowering.py
+++ b/torch/_inductor/lowering.py
@@ -607,7 +607,7 @@ def register_pointwise(
         fn,
         override_return_dtype=override_return_dtype,
         override_fn_when_input_bool=override_fn_when_input_bool,
-        override_fn_when_cuda_float64=fn_libdevice if use_libdevice_for_f64 else None,
+        override_fn_when_cuda_float64=fn_libdevice if use_libdevice_for_f64 else None,  # type: ignore[possibly-undefined]
         allow_alpha=allow_alpha,
     )
     fn = register_lowering(
@@ -3622,8 +3622,8 @@ def _reflection_padnd_backward(grad_output, x, padding):
                     out = right_reflect[i]
                     index_range = (xyz[i], dhw[i] - padding_right[i], dhw[i] - 1)
 
-                outs.append(out)
-                index_ranges.append(index_range)
+                outs.append(out)  # type: ignore[possibly-undefined]
+                index_ranges.append(index_range)  # type: ignore[possibly-undefined]
 
             grad = accumulate(grad, outs, index_ranges)
 
diff --git a/torch/_inductor/pattern_matcher.py b/torch/_inductor/pattern_matcher.py
index d7f3ac43bc51..df744ce8bc6b 100644
--- a/torch/_inductor/pattern_matcher.py
+++ b/torch/_inductor/pattern_matcher.py
@@ -1196,7 +1196,7 @@ class PatternMatcherPass:
                     if (
                         self.prevent_match_across_mutations
                         and is_match(m)
-                        and len(set(map(get_mutation_region_id_partial, m.nodes))) != 1
+                        and len(set(map(get_mutation_region_id_partial, m.nodes))) != 1  # type: ignore[possibly-undefined]
                     ):
                         continue
                     if os.environ.get("TORCHINDUCTOR_PATTERN_MATCH_DEBUG") == node.name:
diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py
index 82263fff5724..8ccc078987a8 100644
--- a/torch/_inductor/scheduler.py
+++ b/torch/_inductor/scheduler.py
@@ -1038,7 +1038,7 @@ class ForeachKernelSchedulerNode(FusedSchedulerNode):
                 else:
                     fused_nodes.append(node)
 
-        return cls(producer.scheduler, fused_nodes, prev_node_1, prev_node_2)
+        return cls(producer.scheduler, fused_nodes, prev_node_1, prev_node_2)  # type: ignore[possibly-undefined]
 
     def __init__(
         self,
@@ -2256,13 +2256,13 @@ class Scheduler:
 
             if node.is_template():
                 node, *epilogue = node.get_nodes()
-                self.get_backend(device).codegen_template(node, epilogue)
+                self.get_backend(device).codegen_template(node, epilogue)  # type: ignore[possibly-undefined]
             elif node.is_extern():
                 self.codegen_extern_call(node)
             elif node.is_foreach():
-                self.get_backend(device).codegen_foreach(node)
+                self.get_backend(device).codegen_foreach(node)  # type: ignore[possibly-undefined]
             elif isinstance(node, (FusedSchedulerNode, SchedulerNode)):
-                self.get_backend(device).codegen_nodes(node.get_nodes())
+                self.get_backend(device).codegen_nodes(node.get_nodes())  # type: ignore[possibly-undefined]
             else:
                 assert isinstance(node, NopKernelSchedulerNode)
                 node.allocate()
@@ -2271,7 +2271,7 @@ class Scheduler:
                 V.graph.wrapper_code.generate_inf_and_nan_checker(node)
 
             if config.triton.debug_sync_kernel:
-                self.get_backend(device).codegen_sync()
+                self.get_backend(device).codegen_sync()  # type: ignore[possibly-undefined]
 
             self.available_buffer_names.update(node.get_names())
 
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index 8c36dc010297..6d95213db8ec 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -331,7 +331,7 @@ def timed(
         synchronize(device)
     t1 = time.perf_counter()
     # GC the result after timing
-    assert result is not None
+    assert result is not None  # type: ignore[possibly-undefined]
     return t1 - t0
 
 
diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py
index 0917e9e75dbf..39470e4b6e67 100644
--- a/torch/_meta_registrations.py
+++ b/torch/_meta_registrations.py
@@ -1147,7 +1147,7 @@ def _parse_qr_mode(mode: str) -> Tuple[bool, bool]:
                 f"but expected one of 'reduced' (default), 'r', or 'complete'"
             ),
         )
-    return compute_q, reduced
+    return compute_q, reduced  # type: ignore[possibly-undefined]
 
 
 @register_meta([aten.linalg_qr.default, aten.linalg_qr.out])
@@ -1412,7 +1412,7 @@ def triangular_solve_meta(
         cloned_coefficient = self.new_empty([0])
     else:
         torch._check(False, lambda: "triangular_solve: Got an unexpected layout.")
-    return solution, cloned_coefficient
+    return solution, cloned_coefficient  # type: ignore[possibly-undefined]
 
 
 # From aten/src/ATen/native/LinearAlgebra.cpp
@@ -1809,7 +1809,7 @@ def _pad3d_common(input, padding, *, is_reflection):
     )
 
     if batch_mode:
-        return input.new_empty((nbatch, nplane, output_d, output_h, output_w))
+        return input.new_empty((nbatch, nplane, output_d, output_h, output_w))  # type: ignore[possibly-undefined]
     else:
         return input.new_empty((nplane, output_d, output_h, output_w))
 
diff --git a/torch/_prims/__init__.py b/torch/_prims/__init__.py
index e25aec08a266..5b505d3e87d0 100644
--- a/torch/_prims/__init__.py
+++ b/torch/_prims/__init__.py
@@ -246,10 +246,10 @@ def TensorMeta(
         assert dtype is not None
         assert device is not None
 
-    shape = inferred_shape if shape is None else tuple(shape)
-    strides = inferred_strides if strides is None else tuple(strides)
-    dtype = inferred_dtype if dtype is None else dtype
-    device = inferred_device if device is None else device
+    shape = inferred_shape if shape is None else tuple(shape)  # type: ignore[possibly-undefined]
+    strides = inferred_strides if strides is None else tuple(strides)  # type: ignore[possibly-undefined]
+    dtype = inferred_dtype if dtype is None else dtype  # type: ignore[possibly-undefined]
+    device = inferred_device if device is None else device  # type: ignore[possibly-undefined]
 
     if isinstance(device, str):
         device = torch.device(device)
diff --git a/torch/_refs/__init__.py b/torch/_refs/__init__.py
index 7af5d43adde6..40e78a1f1826 100644
--- a/torch/_refs/__init__.py
+++ b/torch/_refs/__init__.py
@@ -4875,16 +4875,16 @@ def arange(
     # other integral dtypes we don't. Weird... but needed to match ATen shapes.
     if dtype == torch.int64:
         # Uses floordiv to avoid ceil in inductor.
-        sgn = bool(xstep > 0) - bool(xstep < 0)
-        length = (xend - xstart + xstep - sgn) // xstep
+        sgn = bool(xstep > 0) - bool(xstep < 0)  # type: ignore[possibly-undefined]
+        length = (xend - xstart + xstep - sgn) // xstep  # type: ignore[possibly-undefined]
     else:
         length = math.ceil((end - start) / step)
 
     if is_integer:
         return prims.iota(
             length,
-            start=xstart,
-            step=xstep,
+            start=xstart,  # type: ignore[possibly-undefined]
+            step=xstep,  # type: ignore[possibly-undefined]
             dtype=dtype,
             device=device,
             requires_grad=requires_grad,
diff --git a/torch/_refs/fft.py b/torch/_refs/fft.py
index cc2cae10fb0d..df0c34be2619 100644
--- a/torch/_refs/fft.py
+++ b/torch/_refs/fft.py
@@ -312,7 +312,7 @@ def _canonicalize_fft_shape_and_dim_args(
 
         # Translate any -1 values in shape to the default length
         ret_shape = tuple(
-            s if s != -1 else input_sizes[d] for (s, d) in zip(shape, ret_dims)
+            s if s != -1 else input_sizes[d] for (s, d) in zip(shape, ret_dims)  # type: ignore[possibly-undefined]
         )
     elif dim is None:
         # No shape, no dim
@@ -320,12 +320,12 @@ def _canonicalize_fft_shape_and_dim_args(
         ret_shape = tuple(input_sizes)
     else:
         # No shape, has dim
-        ret_shape = tuple(input_sizes[d] for d in ret_dims)
+        ret_shape = tuple(input_sizes[d] for d in ret_dims)  # type: ignore[possibly-undefined]
 
     for n in ret_shape:
         torch._check(n > 0, lambda: f"Invalid number of data points ({n}) specified")
 
-    return _ShapeAndDims(shape=ret_shape, dims=ret_dims)
+    return _ShapeAndDims(shape=ret_shape, dims=ret_dims)  # type: ignore[possibly-undefined]
 
 
 def _prod(xs: Iterable[int]) -> int:
diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py
index 1293a0fd61ae..624db452d6ec 100644
--- a/torch/_tensor_str.py
+++ b/torch/_tensor_str.py
@@ -610,7 +610,7 @@ def _str_intern(inp, *, tensor_contents=None):
         # no-grad mode. See: https://github.com/pytorch/pytorch/issues/99968
         grad_fn_name = "Invalid"
 
-    if grad_fn_name is None and grad_fn is not None:
+    if grad_fn_name is None and grad_fn is not None:  # type: ignore[possibly-undefined]
         grad_fn_name = type(grad_fn).__name__
         if grad_fn_name == "CppFunction":
             grad_fn_name = grad_fn.name().rsplit("::", 1)[-1]
@@ -627,7 +627,7 @@ def _str_intern(inp, *, tensor_contents=None):
         suffixes.append(f"tangent={tangent}")
 
     string_repr = _add_suffixes(
-        prefix + tensor_str, suffixes, indent, force_newline=self.is_sparse
+        prefix + tensor_str, suffixes, indent, force_newline=self.is_sparse  # type: ignore[possibly-undefined]
     )
 
     # Check if this instance is flagged as a parameter and change the repr accordingly.
diff --git a/torch/ao/nn/intrinsic/qat/modules/conv_fused.py b/torch/ao/nn/intrinsic/qat/modules/conv_fused.py
index 00d454e70a44..906206e18e64 100644
--- a/torch/ao/nn/intrinsic/qat/modules/conv_fused.py
+++ b/torch/ao/nn/intrinsic/qat/modules/conv_fused.py
@@ -188,7 +188,7 @@ class _ConvBnNd(nn.modules.conv._ConvNd, nni._FusedModule):
 
         if self.bn.training:
             avg_dims = [0] + list(range(2, len(self.weight.shape)))
-            batch_mean = conv_out.mean(avg_dims)
+            batch_mean = conv_out.mean(avg_dims)  # type: ignore[possibly-undefined]
             batch_var = torch.square(conv_out - batch_mean.reshape(bias_shape)).mean(
                 avg_dims
             )
diff --git a/torch/ao/nn/quantized/modules/activation.py b/torch/ao/nn/quantized/modules/activation.py
index 74b518dea4c2..6fcd223e5049 100644
--- a/torch/ao/nn/quantized/modules/activation.py
+++ b/torch/ao/nn/quantized/modules/activation.py
@@ -231,7 +231,7 @@ class MultiheadAttention(torch.ao.nn.quantizable.MultiheadAttention):
 
         if converted.bias_v is not None:
             bias_v = converted._parameters.pop('bias_v')
-            sc, zp = torch._choose_qparams_per_tensor(bias_k,
+            sc, zp = torch._choose_qparams_per_tensor(bias_k,  # type: ignore[possibly-undefined]
                                                       reduce_range=False)
             bias_v = torch.quantize_per_tensor(bias_v, sc, zp, torch.quint8)
             setattr(converted, 'bias_v', bias_v)  # noqa: B010
diff --git a/torch/ao/nn/quantized/modules/linear.py b/torch/ao/nn/quantized/modules/linear.py
index 213934e62962..a6c720e9b609 100644
--- a/torch/ao/nn/quantized/modules/linear.py
+++ b/torch/ao/nn/quantized/modules/linear.py
@@ -24,7 +24,7 @@ class LinearPackedParams(torch.nn.Module):
             wq = torch._empty_affine_quantized([1, 1], scale=1.0, zero_point=0, dtype=torch.qint8)
         elif self.dtype == torch.float16:
             wq = torch.zeros([1, 1], dtype=torch.float)
-        self.set_weight_bias(wq, None)
+        self.set_weight_bias(wq, None)  # type: ignore[possibly-undefined]
 
     @torch.jit.export
     def set_weight_bias(self, weight: torch.Tensor, bias: Optional[torch.Tensor]) -> None:
diff --git a/torch/ao/nn/quantized/reference/modules/rnn.py b/torch/ao/nn/quantized/reference/modules/rnn.py
index 9f44667c270b..4120338ce271 100644
--- a/torch/ao/nn/quantized/reference/modules/rnn.py
+++ b/torch/ao/nn/quantized/reference/modules/rnn.py
@@ -435,7 +435,7 @@ class LSTM(RNNBase):
             hx = (h_zeros, c_zeros)
         else:
             if batch_sizes is None:  # If not PackedSequence input.
-                if is_batched:
+                if is_batched:  # type: ignore[possibly-undefined]
                     if (hx[0].dim() != 3 or hx[1].dim() != 3):
                         msg = ("For batched 3-D input, hx and cx should "
                                f"also be 3-D but got ({hx[0].dim()}-D, {hx[1].dim()}-D) tensors")
@@ -465,8 +465,8 @@ class LSTM(RNNBase):
             output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
             return output_packed, self.permute_hidden(hidden, unsorted_indices)
         else:
-            if not is_batched:
-                output = output.squeeze(batch_dim)
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
                 hidden = (hidden[0].squeeze(1), hidden[1].squeeze(1))
             return output, self.permute_hidden(hidden, unsorted_indices)
 
@@ -589,8 +589,8 @@ class GRU(RNNBase):
             output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
             return output_packed, self.permute_hidden(hidden, unsorted_indices)
         else:
-            if not is_batched:
-                output = output.squeeze(batch_dim)
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
                 hidden = hidden.squeeze(1)
 
             return output, self.permute_hidden(hidden, unsorted_indices)
diff --git a/torch/ao/ns/fx/graph_passes.py b/torch/ao/ns/fx/graph_passes.py
index edd5284cf6eb..fbd03426790d 100644
--- a/torch/ao/ns/fx/graph_passes.py
+++ b/torch/ao/ns/fx/graph_passes.py
@@ -759,7 +759,7 @@ def create_a_shadows_b(
                 continue
 
             fqn_base_a = _maybe_get_fqn(subgraph_a.base_op_node, gm_a)
-            fqn_base_b = _maybe_get_fqn(subgraph_b.base_op_node, gm_b)
+            fqn_base_b = _maybe_get_fqn(subgraph_b.base_op_node, gm_b)  # type: ignore[possibly-undefined]
 
             if node_b_is_start_node:
 
@@ -817,7 +817,7 @@ def create_a_shadows_b(
                 # cast dtype from the dtype of node_c's input to the dtype of
                 # node_a's input (dequant, etc)
                 # prev_node_c = node_c.args[0]
-                prev_node_c = get_normalized_nth_input(node_c, gm_b, 0)
+                prev_node_c = get_normalized_nth_input(node_c, gm_b, 0)  # type: ignore[possibly-undefined]
                 if should_log_inputs:
                     # skip the input logger when inserting a dtype cast
                     if isinstance(prev_node_c, Node):
@@ -901,7 +901,7 @@ def create_a_shadows_b(
                     # input_logger = env_c[dtype_cast_node.name]
                     # Find the first node in the subgraph
                     cur_node = node_a_shadows_c
-                    while get_normalized_nth_input(cur_node, gm_b, 0) != input_logger:
+                    while get_normalized_nth_input(cur_node, gm_b, 0) != input_logger:  # type: ignore[possibly-undefined]
                         cur_node = get_normalized_nth_input(cur_node, gm_b, 0)  # type: ignore[assignment]
                     if isinstance(input_logger, Node):
                         input_logger_mod = getattr(gm_b, input_logger.name)
diff --git a/torch/ao/ns/fx/n_shadows_utils.py b/torch/ao/ns/fx/n_shadows_utils.py
index 297d9b878cd2..b7eddf93e2ae 100644
--- a/torch/ao/ns/fx/n_shadows_utils.py
+++ b/torch/ao/ns/fx/n_shadows_utils.py
@@ -92,7 +92,7 @@ class OutputProp:
             elif node.op == 'call_module':
                 result = self.modules[node.target](*load_arg(node.args), **load_arg(node.kwargs))
 
-            if isinstance(result, torch.Tensor):
+            if isinstance(result, torch.Tensor):  # type: ignore[possibly-undefined]
                 node.traced_result = result
 
             env[node.name] = result
@@ -375,7 +375,7 @@ def create_submodule_from_subgraph(
             # TODO(future PR): this is ignoring kwargs, will need to support kwargs
             # for any fusion pattern which has them for a node that is not the
             # first node.
-            cur_args_copy = [cur_node_copy]  # type: ignore[has-type]  # noqa: F821
+            cur_args_copy = [cur_node_copy]  # type: ignore[has-type, possibly-undefined]  # noqa: F821
 
             if len(cur_node_orig.args) > 1:
                 for arg in cur_node_orig.args[1:]:
@@ -399,15 +399,15 @@ def create_submodule_from_subgraph(
             mod_name = f"mod_{cur_name_idx}"
             setattr(gm, mod_name, orig_mod_copy)
             cur_name_idx += 1
-            cur_node_copy = g.call_module(mod_name, cur_args_copy, cur_kwargs_copy)
+            cur_node_copy = g.call_module(mod_name, cur_args_copy, cur_kwargs_copy)  # type: ignore[possibly-undefined]
 
         elif cur_node_orig.op == 'call_function':
             cur_node_copy = g.call_function(
-                cur_node_orig.target, cur_args_copy, cur_kwargs_copy)
+                cur_node_orig.target, cur_args_copy, cur_kwargs_copy)  # type: ignore[possibly-undefined]
 
         elif cur_node_orig.op == 'call_method':
             cur_node_copy = g.call_method(
-                cur_node_orig.target, cur_args_copy, cur_kwargs_copy)
+                cur_node_orig.target, cur_args_copy, cur_kwargs_copy)  # type: ignore[possibly-undefined]
 
         else:
             raise AssertionError(f'{cur_node_orig.op} not supported yet')
diff --git a/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py b/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
index c336799c6225..7c03a9f6e36a 100644
--- a/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
+++ b/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
@@ -402,7 +402,7 @@ class ActivationSparsifier:
                 hook = layer.register_forward_pre_hook(self._sparsify_hook(name))
 
             config['layer'] = layer
-            config['hook'] = hook
+            config['hook'] = hook  # type: ignore[possibly-undefined]
 
     def __repr__(self):
         format_string = self.__class__.__name__ + ' ('
diff --git a/torch/ao/pruning/_experimental/pruner/prune_functions.py b/torch/ao/pruning/_experimental/pruner/prune_functions.py
index 8278ec642e9d..a75c09cc30f8 100644
--- a/torch/ao/pruning/_experimental/pruner/prune_functions.py
+++ b/torch/ao/pruning/_experimental/pruner/prune_functions.py
@@ -117,7 +117,7 @@ def _prune_linear_helper(linear: nn.Linear) -> Tensor:
 
     with torch.no_grad():
         parametrize.remove_parametrizations(linear, "weight", leave_parametrized=True)
-        linear.weight = nn.Parameter(linear.weight[mask])
+        linear.weight = nn.Parameter(linear.weight[mask])  # type: ignore[possibly-undefined]
     linear.out_features = linear.weight.shape[0]
     _remove_bias_handles(linear)
 
@@ -175,7 +175,7 @@ def _prune_conv2d_helper(conv2d: nn.Conv2d) -> Tensor:
 
     with torch.no_grad():
         parametrize.remove_parametrizations(conv2d, "weight", leave_parametrized=True)
-        conv2d.weight = nn.Parameter(conv2d.weight[mask])
+        conv2d.weight = nn.Parameter(conv2d.weight[mask])  # type: ignore[possibly-undefined]
     conv2d.out_channels = conv2d.weight.shape[0]
 
     _remove_bias_handles(conv2d)
@@ -197,7 +197,7 @@ def prune_conv2d_padded(conv2d_1: nn.Conv2d) -> None:
             conv2d_1.bias is not None
         ):  # conv2d_1 has original bias and bias propagated from previous layer
             new_bias = torch.zeros(conv2d_1.bias.shape)
-            new_bias[mask] = conv2d_1.bias[mask]
+            new_bias[mask] = conv2d_1.bias[mask]  # type: ignore[possibly-undefined]
             # adjusted bias that to keep in conv2d_1
             new_bias[~mask] = cast(Tensor, conv2d_1._bias)[~mask]
             # pruned biases that are kept instead of propagated
@@ -209,7 +209,7 @@ def prune_conv2d_padded(conv2d_1: nn.Conv2d) -> None:
         if (
             conv2d_1.bias is not None
         ):  # conv2d_1 has bias propagated from previous layer
-            conv2d_1.bias.data[~mask] = 0
+            conv2d_1.bias.data[~mask] = 0  # type: ignore[possibly-undefined]
 
     if hasattr(conv2d_1, "_bias"):
         delattr(conv2d_1, "_bias")
diff --git a/torch/ao/quantization/fx/prepare.py b/torch/ao/quantization/fx/prepare.py
index a8db114b2b48..aba802f01c64 100644
--- a/torch/ao/quantization/fx/prepare.py
+++ b/torch/ao/quantization/fx/prepare.py
@@ -835,7 +835,7 @@ def _maybe_insert_input_observer_for_arg_or_kwarg(
                 maybe_obs_mod = named_modules[maybe_obs_node.target]  # type: ignore[index]
                 if (
                     type(maybe_obs_mod) == type(arg_as_input_act_obs_or_fq) and
-                    maybe_obs_mod.dtype == arg_as_input_target_dtype
+                    maybe_obs_mod.dtype == arg_as_input_target_dtype  # type: ignore[possibly-undefined]
                 ):
                     arg_as_input_act_obs_or_fq = maybe_obs_mod  # type: ignore[assignment]
                     existing_obs_node = maybe_obs_node
diff --git a/torch/autograd/graph.py b/torch/autograd/graph.py
index 4d197e3371c1..42922026e7da 100644
--- a/torch/autograd/graph.py
+++ b/torch/autograd/graph.py
@@ -516,7 +516,7 @@ def register_multi_grad_hook(
             if tensor.requires_grad
         )
 
-    return Handle(handles)
+    return Handle(handles)  # type: ignore[possibly-undefined]
 
 
 # NOTE [Allow mutation on tensors saved for backward]
@@ -746,4 +746,4 @@ def _engine_run_backward(t_outputs, *args, **kwargs):
         )  # Calls into the C++ engine to run the backward pass
     finally:
         if attach_logging_hooks:
-            unregister_hooks()
+            unregister_hooks()  # type: ignore[possibly-undefined]
diff --git a/torch/autograd/profiler_util.py b/torch/autograd/profiler_util.py
index 331b5c77f659..0d7388b1316c 100644
--- a/torch/autograd/profiler_util.py
+++ b/torch/autograd/profiler_util.py
@@ -1148,7 +1148,7 @@ def _build_table(
             if evt.flops <= 0:
                 row_values.append("--")
             else:
-                row_values.append(f"{evt.flops * flops_scale:8.3f}")
+                row_values.append(f"{evt.flops * flops_scale:8.3f}")  # type: ignore[possibly-undefined]
         if has_stack:
             src_field = ""
             if len(evt.stack) > 0:
diff --git a/torch/backends/_nnapi/serializer.py b/torch/backends/_nnapi/serializer.py
index 748132eb07d6..960f4091723d 100644
--- a/torch/backends/_nnapi/serializer.py
+++ b/torch/backends/_nnapi/serializer.py
@@ -1176,7 +1176,7 @@ class _NnapiSerializer:
             shape=change_element(out_oper.shape, dim, out_dim_size)
         )
 
-        if in_oper.dim_order == DimOrder.CHANNELS_LAST:
+        if in_oper.dim_order == DimOrder.CHANNELS_LAST:  # type: ignore[possibly-undefined]
             assert len(out_oper.shape) == 4
             nnapi_dim = [0, 3, 1, 2][dim]
         else:
@@ -1633,10 +1633,10 @@ class _NnapiSerializer:
         size_ctype, size_arg = self.get_constant_value(size_jit)
 
         if node.inputsSize() == 3:
-            scale_ctype, scale_arg = self.get_constant_value(scale_jit)
+            scale_ctype, scale_arg = self.get_constant_value(scale_jit)  # type: ignore[possibly-undefined]
         else:
-            scale_h_ctype, scale_h_arg = self.get_constant_value(scale_h_jit)
-            scale_w_ctype, scale_w_arg = self.get_constant_value(scale_w_jit)
+            scale_h_ctype, scale_h_arg = self.get_constant_value(scale_h_jit)  # type: ignore[possibly-undefined]
+            scale_w_ctype, scale_w_arg = self.get_constant_value(scale_w_jit)  # type: ignore[possibly-undefined]
 
             # The only way for the 4-argument overload of upsample_nearest2d to
             # have been added to the graph without error is if the scale_h and
diff --git a/torch/cuda/graphs.py b/torch/cuda/graphs.py
index 16c635247eec..563450e58e17 100644
--- a/torch/cuda/graphs.py
+++ b/torch/cuda/graphs.py
@@ -325,7 +325,7 @@ def make_graphed_callables(
                     only_inputs=True,
                     allow_unused=allow_unused_input,
                 )
-            del outputs, grad_inputs
+            del outputs, grad_inputs  # type: ignore[possibly-undefined]
     torch.cuda.synchronize()
 
     # All captures here share a mempool. To avoid replays corrupting each other's memory,
diff --git a/torch/distributed/_shard/sharding_spec/_internals.py b/torch/distributed/_shard/sharding_spec/_internals.py
index d9de0b985132..e8275063e038 100644
--- a/torch/distributed/_shard/sharding_spec/_internals.py
+++ b/torch/distributed/_shard/sharding_spec/_internals.py
@@ -206,4 +206,4 @@ def get_chunk_sharding_params(sharding_dim_size, world_size, spec, rank):
             start_pos = current_offsets
             break
         current_offsets += chunk_size
-    return start_pos, chunk_size
+    return start_pos, chunk_size  # type: ignore[possibly-undefined]
diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py
index 1fa474e03426..2d6ea1d705d5 100644
--- a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py
+++ b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py
@@ -395,7 +395,7 @@ def _handle_row_wise_sharding(
     result = torch.nn.functional.embedding_bag(
         lookup_input,
         torch.cat([local_shard, padding_row]),
-        offsets=offsets_list if offsets is not None else offsets,
+        offsets=offsets_list if offsets is not None else offsets,  # type: ignore[possibly-undefined]
         mode=mode if mode != "mean" else "sum",
         per_sample_weights=per_sample_weights,
         max_norm=max_norm,
diff --git a/torch/distributed/_spmd/data_parallel.py b/torch/distributed/_spmd/data_parallel.py
index 6bb9bd69e51b..80ad107b794f 100644
--- a/torch/distributed/_spmd/data_parallel.py
+++ b/torch/distributed/_spmd/data_parallel.py
@@ -541,7 +541,7 @@ def mark_data_parallel_shardings(
                 # mark activation as sharded on batch dim
                 node_sharding = node_strategies[0]
 
-            node.meta["sharding"] = node_sharding
+            node.meta["sharding"] = node_sharding  # type: ignore[possibly-undefined]
 
             placeholder_idx += 1
         elif node.op == "call_function":
diff --git a/torch/distributed/_spmd/gm_transformation.py b/torch/distributed/_spmd/gm_transformation.py
index f65f786e3da6..ea2be4bb36ce 100644
--- a/torch/distributed/_spmd/gm_transformation.py
+++ b/torch/distributed/_spmd/gm_transformation.py
@@ -45,7 +45,7 @@ class GraphModuleTransformation:
                     "iter_graph_main_gm": iter_gm.main_gm.print_readable(False),
                     "iter_graph_cleanup_gm": iter_gm.cleanup_gm.print_readable(False),
                 },
-                graph_folder,
+                graph_folder,  # type: ignore[possibly-undefined]
             )
 
         return iter_gm
diff --git a/torch/distributed/_spmd/graph_optimization.py b/torch/distributed/_spmd/graph_optimization.py
index 09128c2b91f7..10423fb55cd4 100644
--- a/torch/distributed/_spmd/graph_optimization.py
+++ b/torch/distributed/_spmd/graph_optimization.py
@@ -353,7 +353,7 @@ def _scatter_wait_result(
             gm.graph.node_replace_all_uses_with(orig_wait, wait_output_node)
 
         if last_split_reshape_node == split_node:
-            last_split_reshape_node = wait_output_node
+            last_split_reshape_node = wait_output_node  # type: ignore[possibly-undefined]
 
     need_sort_nodes = sorted(need_sort_nodes, key=lambda node: node_indices[node])
     gm.graph.move_after(need_sort_nodes, last_split_reshape_node)
diff --git a/torch/distributed/_spmd/iter_graph_module.py b/torch/distributed/_spmd/iter_graph_module.py
index 5d4a0daa7919..31243bebf91a 100644
--- a/torch/distributed/_spmd/iter_graph_module.py
+++ b/torch/distributed/_spmd/iter_graph_module.py
@@ -561,7 +561,7 @@ class IterGraph(fx.Graph):
                 delete_user_cb,
                 propagate_meta=propagate_meta,
             )
-        return ret
+        return ret  # type: ignore[possibly-undefined]
 
     def node_add_user(self, node: fx.Node, user: Any) -> None:
         for graph in self._all_graphs:
@@ -607,8 +607,8 @@ class IterGraph(fx.Graph):
                 "_foreach_add_",
             ):
                 step_node = node
-                self.node_add_user(optim_node, output_node)
-                self.node_add_user(step_node, optim_node)
+                self.node_add_user(optim_node, output_node)  # type: ignore[possibly-undefined]
+                self.node_add_user(step_node, optim_node)  # type: ignore[possibly-undefined]
 
     def defunctionalize_optim(self) -> None:
         # TODO: remove this API after DCE is not used with IterGraph
@@ -624,8 +624,8 @@ class IterGraph(fx.Graph):
                     "_foreach_add_",
                 ):
                     step_node = node
-                    optim_node.users.pop(output_node, None)
-                    step_node.users.pop(optim_node, None)
+                    optim_node.users.pop(output_node, None)  # type: ignore[possibly-undefined]
+                    step_node.users.pop(optim_node, None)  # type: ignore[possibly-undefined]
 
     def freeze_cross_iter_movement(self) -> None:
         self._freeze_cross_iter_movement = True
diff --git a/torch/distributed/_tensor/dispatch.py b/torch/distributed/_tensor/dispatch.py
index f82e378c2098..4a5cc3a9090e 100644
--- a/torch/distributed/_tensor/dispatch.py
+++ b/torch/distributed/_tensor/dispatch.py
@@ -199,7 +199,7 @@ class OpDispatcher:
         if output_sharding.output_spec is None:
             if op_call == aten.equal.default:
                 obj_list = [None for _ in range(dist.get_world_size())]
-                dist.all_gather_object(obj_list, local_results)
+                dist.all_gather_object(obj_list, local_results)  # type: ignore[possibly-undefined]
                 obj_list = list(filter(lambda x: x is not None, obj_list))
                 # perform reduce on the collection with AND op
                 local_results = functools.reduce(operator.and_, obj_list, True)
@@ -229,7 +229,7 @@ class OpDispatcher:
             assert len(out_dts) >= 1, "out variant should have at least one out arg"
             return tuple(out_dts) if len(out_dts) > 1 else out_dts[0]
         else:
-            return self.wrap(local_results, output_sharding.output_spec)
+            return self.wrap(local_results, output_sharding.output_spec)  # type: ignore[possibly-undefined]
 
     @staticmethod
     def redistribute_local_args(
diff --git a/torch/distributed/_tensor/placement_types.py b/torch/distributed/_tensor/placement_types.py
index cf553e8f2d9b..a7582672382d 100644
--- a/torch/distributed/_tensor/placement_types.py
+++ b/torch/distributed/_tensor/placement_types.py
@@ -201,7 +201,7 @@ class Shard(Placement):
         )
 
         if is_padded:
-            output = self._unpad_tensor(output, pad_sizes[my_coordinate[mesh_dim]])
+            output = self._unpad_tensor(output, pad_sizes[my_coordinate[mesh_dim]])  # type: ignore[possibly-undefined]
         return output
 
     def _to_replicate_tensor(
@@ -236,7 +236,7 @@ class Shard(Placement):
             group=(mesh, mesh_dim),
         )
         if is_padded:
-            unpad_size = full_chunk_size * num_chunks - logical_dim_size
+            unpad_size = full_chunk_size * num_chunks - logical_dim_size  # type: ignore[possibly-undefined]
             result = self._unpad_tensor(result, unpad_size)
         return result
 
diff --git a/torch/distributed/benchmarks/benchmark_ddp_rpc.py b/torch/distributed/benchmarks/benchmark_ddp_rpc.py
index b31b72729a9d..7294fce61ff3 100644
--- a/torch/distributed/benchmarks/benchmark_ddp_rpc.py
+++ b/torch/distributed/benchmarks/benchmark_ddp_rpc.py
@@ -69,7 +69,7 @@ class HybridModel(torch.nn.Module):
         # Make sure combined PS dimension is always bigger or equal than the FC input
         assert NUM_PS * EMBEDDING_DIM >= 512
         dim_normalizer = int(NUM_PS * EMBEDDING_DIM / 512)
-        emb_lookups_reshaped = emb_lookups_cat.reshape(
+        emb_lookups_reshaped = emb_lookups_cat.reshape(  # type: ignore[possibly-undefined]
             [emb_lookups_cat.shape[0] * dim_normalizer, 512]
         )
 
@@ -195,7 +195,7 @@ def _run_trainer(emb_rref_list, rank):
 
     # Throw away warm-up measurements
     measurements = measurements[WARMUP_CYCLES:]
-    return rank, measurements, batch_size
+    return rank, measurements, batch_size  # type: ignore[possibly-undefined]
 
 
 def run_worker(rank, world_size):
diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py
index 7cc094cf0a02..29b8a1fdfab3 100644
--- a/torch/distributed/device_mesh.py
+++ b/torch/distributed/device_mesh.py
@@ -85,7 +85,7 @@ else:
                 if cur_rank in mesh_1d:
                     res_sub_mesh = sub_mesh
 
-            res_sub_mesh._dim_group_infos = [device_mesh._dim_group_infos[mesh_dim]]
+            res_sub_mesh._dim_group_infos = [device_mesh._dim_group_infos[mesh_dim]]  # type: ignore[possibly-undefined]
             # Assign the current DeviceMesh as the parent of the child DeviceMesh.
             self.child_to_parent_mapping[res_sub_mesh] = device_mesh
             return res_sub_mesh
diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py
index 2bda05c33759..12b2cb242a13 100644
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@@ -1942,9 +1942,9 @@ def _coalescing_manager(
         work = group._end_coalescing(device)
 
     if async_ops:
-        cm.append(work)
+        cm.append(work)  # type: ignore[possibly-undefined]
     else:
-        work.wait()
+        work.wait()  # type: ignore[possibly-undefined]
 
 
 def batch_isend_irecv(p2p_op_list):
@@ -2457,7 +2457,7 @@ def gather_object(obj, object_gather_list=None, dst=0, group=None):
     # All ranks call gather with equal-sized tensors.
     gather(
         input_tensor,
-        gather_list=output_tensors if my_rank == dst else None,
+        gather_list=output_tensors if my_rank == dst else None,  # type: ignore[possibly-undefined]
         dst=dst,
         group=group,
     )
@@ -2556,7 +2556,7 @@ def broadcast_object_list(object_list, src=0, group=None, device=None):
     # Note: torch.cat will do an extra memory copy to the current device, if the tensor_list
     # has only one element, we can skip the copy.
     if my_rank == src:
-        if len(tensor_list) == 1:
+        if len(tensor_list) == 1:  # type: ignore[possibly-undefined]
             object_tensor = tensor_list[0]
         else:
             object_tensor = torch.cat(tensor_list)
@@ -2659,8 +2659,8 @@ def scatter_object_list(
     # Src rank broadcasts the maximum tensor size. This is because all ranks are
     # expected to call into scatter() with equal-sized tensors.
     if my_rank == src:
-        max_tensor_size = max(tensor_sizes)
-        for tensor in tensor_list:
+        max_tensor_size = max(tensor_sizes)  # type: ignore[possibly-undefined]
+        for tensor in tensor_list:  # type: ignore[possibly-undefined]
             tensor.resize_(max_tensor_size)
     else:
         max_tensor_size = torch.tensor([0], dtype=torch.long, device=pg_device)
@@ -2670,7 +2670,7 @@ def scatter_object_list(
     output_tensor = torch.empty(max_tensor_size.item(), dtype=torch.uint8, device=pg_device)
     scatter(
         output_tensor,
-        scatter_list=None if my_rank != src else tensor_list,
+        scatter_list=None if my_rank != src else tensor_list,  # type: ignore[possibly-undefined]
         src=src,
         group=group,
     )
@@ -2679,7 +2679,7 @@ def scatter_object_list(
     obj_tensor_size = torch.tensor([0], dtype=torch.long, device=pg_device)
     scatter(
         obj_tensor_size,
-        scatter_list=None if my_rank != src else tensor_sizes,
+        scatter_list=None if my_rank != src else tensor_sizes,  # type: ignore[possibly-undefined]
         src=src,
         group=group,
     )
diff --git a/torch/distributed/elastic/events/api.py b/torch/distributed/elastic/events/api.py
index a1fcca28c715..62f5d7500922 100644
--- a/torch/distributed/elastic/events/api.py
+++ b/torch/distributed/elastic/events/api.py
@@ -51,7 +51,7 @@ class Event:
             return data
         if isinstance(data, str):
             data_dict = json.loads(data)
-        data_dict["source"] = EventSource[data_dict["source"]]
+        data_dict["source"] = EventSource[data_dict["source"]]  # type: ignore[possibly-undefined]
         return Event(**data_dict)
 
     def serialize(self) -> str:
@@ -105,7 +105,7 @@ class RdzvEvent:
             return data
         if isinstance(data, str):
             data_dict = json.loads(data)
-        data_dict["node_state"] = NodeState[data_dict["node_state"]]
+        data_dict["node_state"] = NodeState[data_dict["node_state"]]  # type: ignore[possibly-undefined]
         return RdzvEvent(**data_dict)
 
     def serialize(self) -> str:
diff --git a/torch/distributed/elastic/metrics/api.py b/torch/distributed/elastic/metrics/api.py
index d32bb33bee35..1499943c78d2 100644
--- a/torch/distributed/elastic/metrics/api.py
+++ b/torch/distributed/elastic/metrics/api.py
@@ -126,7 +126,7 @@ def prof(fn=None, group: str = "torchelastic"):
                 put_metric(f"{key}.failure", 1, group)
                 raise
             finally:
-                put_metric(f"{key}.duration.ms", get_elapsed_time_ms(start), group)
+                put_metric(f"{key}.duration.ms", get_elapsed_time_ms(start), group)  # type: ignore[possibly-undefined]
             return result
 
         return wrapper
@@ -164,7 +164,7 @@ def profile(group=None):
                 publish_metric(
                     group,
                     f"{func.__name__}.duration.ms",
-                    get_elapsed_time_ms(start_time),
+                    get_elapsed_time_ms(start_time),  # type: ignore[possibly-undefined]
                 )
             return result
 
diff --git a/torch/distributed/elastic/rendezvous/c10d_rendezvous_backend.py b/torch/distributed/elastic/rendezvous/c10d_rendezvous_backend.py
index faf260a3a6be..c73e971791a6 100644
--- a/torch/distributed/elastic/rendezvous/c10d_rendezvous_backend.py
+++ b/torch/distributed/elastic/rendezvous/c10d_rendezvous_backend.py
@@ -176,7 +176,7 @@ def _create_tcp_store(params: RendezvousParameters) -> TCPStore:
                     "The connection to the C10d store has failed. See inner exception for details."
                 ) from exc
 
-    return store
+    return store  # type: ignore[possibly-undefined]
 
 
 def _create_file_store(params: RendezvousParameters) -> FileStore:
diff --git a/torch/distributed/elastic/rendezvous/etcd_server.py b/torch/distributed/elastic/rendezvous/etcd_server.py
index e4de931c0f6b..a220f2ca5917 100644
--- a/torch/distributed/elastic/rendezvous/etcd_server.py
+++ b/torch/distributed/elastic/rendezvous/etcd_server.py
@@ -57,7 +57,7 @@ def find_free_port():
             s.listen(0)
             return s
         except OSError as e:
-            s.close()
+            s.close()  # type: ignore[possibly-undefined]
             print(f"Socket creation attempt failed: {e}")
     raise RuntimeError("Failed to create a socket")
 
diff --git a/torch/distributed/fsdp/_flat_param.py b/torch/distributed/fsdp/_flat_param.py
index 1929e4cb0dc1..c6806e47a5ca 100644
--- a/torch/distributed/fsdp/_flat_param.py
+++ b/torch/distributed/fsdp/_flat_param.py
@@ -1767,8 +1767,8 @@ class FlatParamHandle:
             )
         flat_param.data = flat_param._local_shard  # type: ignore[attr-defined]
         if self._use_orig_params:
-            if skip_use_sharded_views:
-                self._unsharded_flat_param_for_skipped_views = unsharded_flat_param
+            if skip_use_sharded_views:  # type: ignore[possibly-undefined]
+                self._unsharded_flat_param_for_skipped_views = unsharded_flat_param  # type: ignore[possibly-undefined]
             else:
                 self._use_sharded_views()
             # For the post-forward reshard, we may try to use sharded gradient
@@ -1776,7 +1776,7 @@ class FlatParamHandle:
             # in `no_sync()`), but for the post-backward reshard, we delay the
             # call to after the reduce-scatter.
             if (
-                in_forward
+                in_forward  # type: ignore[possibly-undefined]
                 # Skip using gradient views if skipped using sharded views
                 # since exposing unsharded parameters with sharded gradients
                 # may be confusing to the user
diff --git a/torch/distributed/fsdp/_init_utils.py b/torch/distributed/fsdp/_init_utils.py
index 884236b14ef7..51be10971cc1 100644
--- a/torch/distributed/fsdp/_init_utils.py
+++ b/torch/distributed/fsdp/_init_utils.py
@@ -885,7 +885,7 @@ def _materialize_meta_module(
         warnings.warn(
             "Unable to call `reset_parameters()` for module on meta "
             f"device with error {str(e)}. Please ensure that your module of"
-            f"type {type(module)} implements a `reset_parameters()` method."
+            f"type {type(module)} implements a `reset_parameters()` method."  # type: ignore[possibly-undefined]
         )
         raise e
 
@@ -994,7 +994,7 @@ def _move_states_to_device(
                     param.grad.data = param.grad.to(device_from_device_id)
         for buffer in buffers:
             buffer.data = buffer.to(device_from_device_id)
-    elif current_device == cpu_device:
+    elif current_device == cpu_device:  # type: ignore[possibly-undefined]
         _warn_cpu_init()
 
 
diff --git a/torch/distributed/fsdp/_optim_utils.py b/torch/distributed/fsdp/_optim_utils.py
index ccf134cea0d1..6e2525ce2af0 100644
--- a/torch/distributed/fsdp/_optim_utils.py
+++ b/torch/distributed/fsdp/_optim_utils.py
@@ -1419,7 +1419,7 @@ def _convert_all_state_info(
                 )
                 gathered_state[name] = scalar_tensor_value
 
-    return dtype, state_buffers
+    return dtype, state_buffers  # type: ignore[possibly-undefined]
 
 
 def _unflatten_orig_param_states(
diff --git a/torch/distributed/tensor/parallel/_utils.py b/torch/distributed/tensor/parallel/_utils.py
index 9ec5c48be85e..a4ec7f42d44d 100644
--- a/torch/distributed/tensor/parallel/_utils.py
+++ b/torch/distributed/tensor/parallel/_utils.py
@@ -171,7 +171,7 @@ def _create_1d_device_mesh(device_mesh: DeviceMesh, tp_mesh_dim: int = 0) -> Dev
         if cur_rank in mesh_1d:
             res_sub_mesh = sub_mesh
 
-    res_sub_mesh._dim_group_infos = [device_mesh._dim_group_infos[tp_mesh_dim]]
+    res_sub_mesh._dim_group_infos = [device_mesh._dim_group_infos[tp_mesh_dim]]  # type: ignore[possibly-undefined]
     return res_sub_mesh
 
 
diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py
index a2de965cc8d1..39ef1ad37640 100644
--- a/torch/export/exported_program.py
+++ b/torch/export/exported_program.py
@@ -253,7 +253,7 @@ class ExportedProgram:
                     user_args, self.call_spec.in_spec, exact_structural_match=True
                 )  # type: ignore[assignment]
             except Exception:
-                _, received_spec = pytree.tree_flatten(user_args)
+                _, received_spec = pytree.tree_flatten(user_args)  # type: ignore[possibly-undefined]
                 raise TypeError(  # noqa: TRY200
                     "Trying to flatten user inputs with exported input tree spec: \n"
                     f"{self.call_spec.in_spec}\n"
diff --git a/torch/fx/experimental/accelerator_partitioner.py b/torch/fx/experimental/accelerator_partitioner.py
index a09269eff510..c2caf933fd56 100644
--- a/torch/fx/experimental/accelerator_partitioner.py
+++ b/torch/fx/experimental/accelerator_partitioner.py
@@ -998,7 +998,7 @@ class Partitioner:
                 if cost < min_cost:
                     node_pair = [node, n1]
                     min_cost = cost
-            return cost, node_pair
+            return cost, node_pair  # type: ignore[possibly-undefined]
 
         # First use size_base_partition
         self.size_based_partition()
diff --git a/torch/fx/experimental/const_fold.py b/torch/fx/experimental/const_fold.py
index b5010c1e509f..548d1d3852b0 100644
--- a/torch/fx/experimental/const_fold.py
+++ b/torch/fx/experimental/const_fold.py
@@ -263,7 +263,7 @@ def split_const_subgraphs(
     setattr(
         split,
         fx_const_folded_attrs_name,
-        torch.nn.ParameterList() if multiple_outputs else torch.nn.Parameter(),
+        torch.nn.ParameterList() if multiple_outputs else torch.nn.Parameter(),  # type: ignore[possibly-undefined]
     )
     for node in split.graph.nodes:
         if node.op == "call_module" and node.target == const_mod_name:
diff --git a/torch/fx/experimental/sym_node.py b/torch/fx/experimental/sym_node.py
index 71ae7abd2e6e..1c9dcb6d7608 100644
--- a/torch/fx/experimental/sym_node.py
+++ b/torch/fx/experimental/sym_node.py
@@ -694,7 +694,7 @@ for name in math_op_names:
     fn.__qualname__ = fn.__name__ = priv_sympy_name
     setattr(current_module, priv_sympy_name, fn)
 
-del fn, name, priv_sympy_name
+del fn, name, priv_sympy_name  # type: ignore[possibly-undefined]
 
 
 def _sympy_abs(a):
@@ -753,7 +753,7 @@ for name in math_op_names:
     sym_name = f"sym_{name}"
     magic_methods[sym_name] = getattr(current_module, f"_sympy_{name}")
 
-del name, sym_name, math_op_names, current_module
+del name, sym_name, math_op_names, current_module  # type: ignore[possibly-undefined]
 
 
 def sympy_is_contiguous(sizes, strides):
diff --git a/torch/fx/experimental/unification/multipledispatch/conflict.py b/torch/fx/experimental/unification/multipledispatch/conflict.py
index fc6415194565..71db96dd476e 100644
--- a/torch/fx/experimental/unification/multipledispatch/conflict.py
+++ b/torch/fx/experimental/unification/multipledispatch/conflict.py
@@ -68,8 +68,8 @@ def consistent(a, b):
                 p1 += 1
         # We only need to check for variadic ends
         # Variadic types are guaranteed to be the last element
-        return (isvariadic(cur_a) and p2 == len(b) or
-                isvariadic(cur_b) and p1 == len(a))
+        return (isvariadic(cur_a) and p2 == len(b) or  # type: ignore[possibly-undefined]
+                isvariadic(cur_b) and p1 == len(a))  # type: ignore[possibly-undefined]
 
 
 def ambiguous(a, b):
diff --git a/torch/fx/passes/net_min_base.py b/torch/fx/passes/net_min_base.py
index 3790acd34329..c03ec8ab7bb2 100644
--- a/torch/fx/passes/net_min_base.py
+++ b/torch/fx/passes/net_min_base.py
@@ -371,11 +371,11 @@ class _MinimizerBase:
         # Compare results
         names: Names = output_names
         if output_names is None:
-            names = [str(v) for v in result_key]
+            names = [str(v) for v in result_key]  # type: ignore[possibly-undefined]
 
         numeric_result, bool_result = self.compare_fn(a_result, b_result, names)
 
-        self.results[result_key] = numeric_result
+        self.results[result_key] = numeric_result  # type: ignore[possibly-undefined]
         report.append(f"Numerical accuracy = {numeric_result}")
         if not bool_result:
             report.append(f"Result mismatch for {result_key}")
diff --git a/torch/fx/passes/splitter_base.py b/torch/fx/passes/splitter_base.py
index e30d9d3e4154..3a493f4af335 100644
--- a/torch/fx/passes/splitter_base.py
+++ b/torch/fx/passes/splitter_base.py
@@ -575,7 +575,7 @@ class _SplitterBase:
                     else:
                         total_output_bytes += get_size_of_node(submod, node)[0]
 
-                map_arg(output_node.args, get_bytes)
+                map_arg(output_node.args, get_bytes)  # type: ignore[possibly-undefined]
                 qps = self.PCIe_BW / max(total_input_bytes, total_output_bytes)
                 reports += f"Total input size in bytes is {total_input_bytes}, total output size in bytes is {total_output_bytes},"
                 reports += f" theoretical max qps (bounds by PCIe bandwidth) for this submodule is {qps}.\n"
diff --git a/torch/fx/subgraph_rewriter.py b/torch/fx/subgraph_rewriter.py
index da258758d0d4..04a3915a537d 100644
--- a/torch/fx/subgraph_rewriter.py
+++ b/torch/fx/subgraph_rewriter.py
@@ -305,7 +305,7 @@ def _replace_pattern(
                     first_user_node = n
                     break
 
-        with original_graph.inserting_before(first_user_node):
+        with original_graph.inserting_before(first_user_node):  # type: ignore[possibly-undefined]
             copied_returning_nodes = original_graph.graph_copy(replacement_graph, val_map)
 
         if isinstance(copied_returning_nodes, Node):
diff --git a/torch/hub.py b/torch/hub.py
index 794d5c703436..cf6d36a0550a 100644
--- a/torch/hub.py
+++ b/torch/hub.py
@@ -650,14 +650,14 @@ def download_url_to_file(url: str, dst: str, hash_prefix: Optional[str] = None,
                 buffer = u.read(READ_DATA_CHUNK)
                 if len(buffer) == 0:
                     break
-                f.write(buffer)
+                f.write(buffer)  # type: ignore[possibly-undefined]
                 if hash_prefix is not None:
-                    sha256.update(buffer)
+                    sha256.update(buffer)  # type: ignore[possibly-undefined]
                 pbar.update(len(buffer))
 
         f.close()
         if hash_prefix is not None:
-            digest = sha256.hexdigest()
+            digest = sha256.hexdigest()  # type: ignore[possibly-undefined]
             if digest[:len(hash_prefix)] != hash_prefix:
                 raise RuntimeError(f'invalid hash value (expected "{hash_prefix}", got "{digest}")')
         shutil.move(f.name, dst)
diff --git a/torch/jit/_fuser.py b/torch/jit/_fuser.py
index 61408af0d727..0ca9cb686010 100644
--- a/torch/jit/_fuser.py
+++ b/torch/jit/_fuser.py
@@ -70,8 +70,8 @@ def fuser(name):
         yield
     finally:
         if name in ["fuser1", "fuser3"]:  # NNC or oneDNN Graph
-            torch._C._jit_set_profiling_executor(old_profiling_executor)
-            torch._C._get_graph_executor_optimize(old_profiling_mode)
+            torch._C._jit_set_profiling_executor(old_profiling_executor)  # type: ignore[possibly-undefined]
+            torch._C._get_graph_executor_optimize(old_profiling_mode)  # type: ignore[possibly-undefined]
         # recover the previous values
         torch._C._jit_override_can_fuse_on_cpu(old_cpu_fuse)
         torch._C._jit_override_can_fuse_on_gpu(old_gpu_fuse)
diff --git a/torch/jit/_trace.py b/torch/jit/_trace.py
index a7052ae4bb01..799d158846f2 100644
--- a/torch/jit/_trace.py
+++ b/torch/jit/_trace.py
@@ -254,7 +254,7 @@ def verify(model, args, loss_fn=torch.sum, devices=None):
         if assert_compiled:
             hits = compiled_fn.hits
         out = model(*args)
-        if assert_compiled and compiled_fn.hits == hits:
+        if assert_compiled and compiled_fn.hits == hits:  # type: ignore[possibly-undefined]
             raise RuntimeError("failed to use the compiled function")
         if not isinstance(out, tuple):
             out = (out,)
@@ -280,7 +280,7 @@ def verify(model, args, loss_fn=torch.sum, devices=None):
         assert model.has_trace_for(*args)
 
     if is_module:
-        model.load_state_dict(saved_state)
+        model.load_state_dict(saved_state)  # type: ignore[possibly-undefined]
     compiled_outs, compiled_grads = run_fwd_bwd(args, assert_compiled=True)
 
     _verify_equal(uncompiled_outs, compiled_outs)
diff --git a/torch/masked/_ops.py b/torch/masked/_ops.py
index ba42719e19fa..2a2ff3fd6f85 100644
--- a/torch/masked/_ops.py
+++ b/torch/masked/_ops.py
@@ -1627,7 +1627,7 @@ def _std_var(
             total = sum(x * x.conj(), dim, keepdim=keepdim, dtype=compute_dtype)
         else:
             total = sum(
-                x * x.conj(), dim, keepdim=keepdim, dtype=compute_dtype, mask=inmask
+                x * x.conj(), dim, keepdim=keepdim, dtype=compute_dtype, mask=inmask  # type: ignore[possibly-undefined]
             )
         if not keepdim:
             count = count.reshape(total.shape)
diff --git a/torch/nn/modules/batchnorm.py b/torch/nn/modules/batchnorm.py
index 778fae7137e2..0eac5cef2daf 100644
--- a/torch/nn/modules/batchnorm.py
+++ b/torch/nn/modules/batchnorm.py
@@ -781,8 +781,8 @@ class SyncBatchNorm(_BatchNorm):
                 running_var,
                 self.eps,
                 exponential_average_factor,
-                process_group,
-                world_size,
+                process_group,  # type: ignore[possibly-undefined]
+                world_size,  # type: ignore[possibly-undefined]
             )
 
     @classmethod
diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py
index 93ea9c826fb8..082e643e6831 100644
--- a/torch/nn/modules/module.py
+++ b/torch/nn/modules/module.py
@@ -1604,9 +1604,9 @@ class Module:
             # For now only forward hooks have the always_call option but perhaps
             # this functionality should be added to full backward hooks as well.
             for hook_id, hook in _global_forward_hooks.items():
-                if hook_id in _global_forward_hooks_always_called and hook_id not in called_always_called_hooks:
+                if hook_id in _global_forward_hooks_always_called and hook_id not in called_always_called_hooks:  # type: ignore[possibly-undefined]
                     try:
-                        hook_result = hook(self, args, result)
+                        hook_result = hook(self, args, result)  # type: ignore[possibly-undefined]
                         if hook_result is not None:
                             result = hook_result
                     except Exception as e:
@@ -1615,12 +1615,12 @@ class Module:
                         continue
 
             for hook_id, hook in self._forward_hooks.items():
-                if hook_id in self._forward_hooks_always_called and hook_id not in called_always_called_hooks:
+                if hook_id in self._forward_hooks_always_called and hook_id not in called_always_called_hooks:  # type: ignore[possibly-undefined]
                     try:
                         if hook_id in self._forward_hooks_with_kwargs:
-                            hook_result = hook(self, args, kwargs, result)
+                            hook_result = hook(self, args, kwargs, result)  # type: ignore[possibly-undefined]
                         else:
-                            hook_result = hook(self, args, result)
+                            hook_result = hook(self, args, result)  # type: ignore[possibly-undefined]
                         if hook_result is not None:
                             result = hook_result
                     except Exception as e:
diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py
index 5bcf321dfd7c..af1678d1204b 100644
--- a/torch/nn/modules/rnn.py
+++ b/torch/nn/modules/rnn.py
@@ -575,8 +575,8 @@ class RNN(RNNBase):
             output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
             return output_packed, self.permute_hidden(hidden, unsorted_indices)
 
-        if not is_batched:
-            output = output.squeeze(batch_dim)
+        if not is_batched:  # type: ignore[possibly-undefined]
+            output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
             hidden = hidden.squeeze(1)
 
         return output, self.permute_hidden(hidden, unsorted_indices)
@@ -888,8 +888,8 @@ class LSTM(RNNBase):
             output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
             return output_packed, self.permute_hidden(hidden, unsorted_indices)
         else:
-            if not is_batched:
-                output = output.squeeze(batch_dim)
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
                 hidden = (hidden[0].squeeze(1), hidden[1].squeeze(1))
             return output, self.permute_hidden(hidden, unsorted_indices)
 
@@ -1111,8 +1111,8 @@ class GRU(RNNBase):
             output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
             return output_packed, self.permute_hidden(hidden, unsorted_indices)
         else:
-            if not is_batched:
-                output = output.squeeze(batch_dim)
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
                 hidden = hidden.squeeze(1)
 
             return output, self.permute_hidden(hidden, unsorted_indices)
diff --git a/torch/nn/utils/parametrizations.py b/torch/nn/utils/parametrizations.py
index e1dd018f58b8..2ccc45563745 100644
--- a/torch/nn/utils/parametrizations.py
+++ b/torch/nn/utils/parametrizations.py
@@ -105,7 +105,7 @@ class _Orthogonal(Module):
             Q = self.base @ Q
         if transposed:
             Q = Q.mT
-        return Q
+        return Q  # type: ignore[possibly-undefined]
 
     @torch.autograd.no_grad()
     def right_inverse(self, Q: torch.Tensor) -> torch.Tensor:
diff --git a/torch/onnx/_internal/jit_utils.py b/torch/onnx/_internal/jit_utils.py
index 9052961fc7a6..ab50eadc5ab6 100644
--- a/torch/onnx/_internal/jit_utils.py
+++ b/torch/onnx/_internal/jit_utils.py
@@ -293,7 +293,7 @@ def _create_node(
             for _ in range(1, n_outputs):
                 node.addOutput()
 
-    node_ouputs = tuple(node.outputs())
+    node_ouputs = tuple(node.outputs())  # type: ignore[possibly-undefined]
     assert len(node_ouputs) == n_outputs
 
     aten = domain_op.startswith("aten::")
diff --git a/torch/onnx/symbolic_opset9.py b/torch/onnx/symbolic_opset9.py
index 8441fa01ccc2..81a6862ca476 100644
--- a/torch/onnx/symbolic_opset9.py
+++ b/torch/onnx/symbolic_opset9.py
@@ -1529,7 +1529,7 @@ def softmax(g: jit_utils.GraphContext, input, dim, dtype=None):
             )
 
         if is_transpose_required:
-            softmax = g.op("Transpose", softmax, perm_i=axes)
+            softmax = g.op("Transpose", softmax, perm_i=axes)  # type: ignore[possibly-undefined]
         return softmax
 
     # Apply max normalization.
@@ -2467,7 +2467,7 @@ def log_softmax(g: jit_utils.GraphContext, input, dim, dtype=None):
             "Cast", return_op, to_i=_type_utils.JitScalarType(parsed_dtype).onnx_type()
         )
     if is_transpose_required:
-        return_op = g.op("Transpose", return_op, perm_i=axes)
+        return_op = g.op("Transpose", return_op, perm_i=axes)  # type: ignore[possibly-undefined]
     return return_op
 
 
@@ -2978,7 +2978,7 @@ def native_layer_norm(
     # mean and normalized, so we need to Cast it back
     if is_type_half:
         denominator = g.op(
-            "Cast", denominator, to_i=_type_utils.JitScalarType(input_dtype).onnx_type()
+            "Cast", denominator, to_i=_type_utils.JitScalarType(input_dtype).onnx_type()  # type: ignore[possibly-undefined]
         )
         rdenominator = g.op("Reciprocal", denominator)
     else:
@@ -4754,7 +4754,7 @@ def _generic_rnn(
                 reform_weights(g, w, hidden_size, reform_permutation) for w in weights
             )
         return tuple(
-            symbolic_helper._unsqueeze_helper(g, x, [0]) for x in (weight_ih, weight_hh)
+            symbolic_helper._unsqueeze_helper(g, x, [0]) for x in (weight_ih, weight_hh)  # type: ignore[possibly-undefined]
         )
 
     @_beartype.beartype
@@ -4766,10 +4766,10 @@ def _generic_rnn(
             weight_ih, weight_hh, bias_ih, bias_hh = (
                 reform_weights(g, w, hidden_size, reform_permutation) for w in weights
             )
-        bias_concat = g.op("Concat", bias_ih, bias_hh, axis_i=0)
+        bias_concat = g.op("Concat", bias_ih, bias_hh, axis_i=0)  # type: ignore[possibly-undefined]
         return tuple(
             symbolic_helper._unsqueeze_helper(g, x, [0])
-            for x in (weight_ih, weight_hh, bias_concat)
+            for x in (weight_ih, weight_hh, bias_concat)  # type: ignore[possibly-undefined]
         )
 
     @_beartype.beartype
@@ -4808,16 +4808,16 @@ def _generic_rnn(
 
         inputs = [prev_output, weight_ih, weight_hh, bias_concat, sequence_lens]
 
-        inputs.append(retrieve_state(h0, *state_indices))
+        inputs.append(retrieve_state(h0, *state_indices))  # type: ignore[possibly-undefined]
         if variant == "LSTM":
-            inputs.append(retrieve_state(c0, *state_indices))
+            inputs.append(retrieve_state(c0, *state_indices))  # type: ignore[possibly-undefined]
 
         extra_kwargs = {} if unidirectional else {"direction_s": "bidirectional"}
         if variant == "RNN":
             if bidirectional:
-                activation = [nonlinearity, nonlinearity]
+                activation = [nonlinearity, nonlinearity]  # type: ignore[possibly-undefined]
             else:
-                activation = [nonlinearity]
+                activation = [nonlinearity]  # type: ignore[possibly-undefined]
 
             prev_output, h_out = g.op(
                 "RNN",
@@ -4859,17 +4859,17 @@ def _generic_rnn(
         else:
             prev_output = symbolic_helper._squeeze_helper(g, prev_output, [1])
 
-        h_outs.append(h_out)
+        h_outs.append(h_out)  # type: ignore[possibly-undefined]
         if variant == "LSTM":
-            c_outs.append(c_out)
+            c_outs.append(c_out)  # type: ignore[possibly-undefined]
     if batch_first:
         # seq, batch, num_directions * hidden_size -> batch, seq, num_directions * hidden_size
         prev_output = g.op("Transpose", prev_output, perm_i=[1, 0, 2])
-    h_outs = h_out if num_layers == 1 else g.op("Concat", *h_outs, axis_i=0)
+    h_outs = h_out if num_layers == 1 else g.op("Concat", *h_outs, axis_i=0)  # type: ignore[possibly-undefined]
     if variant == "RNN" or variant == "GRU":
         return prev_output, h_outs
     elif variant == "LSTM":
-        c_outs = c_out if num_layers == 1 else g.op("Concat", *c_outs, axis_i=0)
+        c_outs = c_out if num_layers == 1 else g.op("Concat", *c_outs, axis_i=0)  # type: ignore[possibly-undefined]
         return prev_output, h_outs, c_outs
 
 
diff --git a/torch/profiler/_utils.py b/torch/profiler/_utils.py
index cb9469e4c983..783a69ea89ab 100644
--- a/torch/profiler/_utils.py
+++ b/torch/profiler/_utils.py
@@ -199,7 +199,7 @@ class BasicEvaluation:
             while (
                 current_kernel_index < len(cuda_kernel_events)
                 and (cuda_kernel_events[current_kernel_index].start_us()) * 1000
-                <= start_time
+                <= start_time  # type: ignore[possibly-undefined]
             ):
                 current_kernel_index += 1
             current_queue_depth = spawned_kernel_index - current_kernel_index + 1
@@ -207,7 +207,7 @@ class BasicEvaluation:
 
             if hasattr(event, "start_us"):
                 queue_depth_list.append(
-                    Interval(start_time, end_time, current_queue_depth)
+                    Interval(start_time, end_time, current_queue_depth)  # type: ignore[possibly-undefined]
                 )
             elif hasattr(event, "start_time_ns"):
                 self.metrics[EventKey(event)].queue_depth = current_queue_depth
diff --git a/torch/quantization/_quantized_conversions.py b/torch/quantization/_quantized_conversions.py
index ce1bfc8fd431..2b7670ea4802 100644
--- a/torch/quantization/_quantized_conversions.py
+++ b/torch/quantization/_quantized_conversions.py
@@ -44,7 +44,7 @@ def quantized_weight_reorder_for_mixed_dtypes_linear_cutlass(
     else:
         outp = weight
 
-    ncols, nrows = outp.shape
+    ncols, nrows = outp.shape  # type: ignore[possibly-undefined]
     assert nrows % (32 if dtypeq == torch.quint4x2 else 64) == 0
     assert ncols % 64 == 0
 
diff --git a/torch/sparse/_semi_structured_conversions.py b/torch/sparse/_semi_structured_conversions.py
index cad147e3c793..c487b151495d 100644
--- a/torch/sparse/_semi_structured_conversions.py
+++ b/torch/sparse/_semi_structured_conversions.py
@@ -134,11 +134,11 @@ def sparse_semi_structured_from_dense_cutlass(dense):
     idxs1 = bit2 | (bit3.to(torch.int64) << 1)
 
     if dense.dtype != torch.float:
-        sparse0 = dense_4.gather(-1, idxs0.unsqueeze(-1))
+        sparse0 = dense_4.gather(-1, idxs0.unsqueeze(-1))  # type: ignore[possibly-undefined]
         sparse1 = dense_4.gather(-1, idxs1.unsqueeze(-1))
         sparse = torch.stack((sparse0, sparse1), dim=-1).view(m, k // 2)
     else:
-        sparse = dense_2.gather(-1, idxs0.unsqueeze(-1) // 2).view(m, k // 2)
+        sparse = dense_2.gather(-1, idxs0.unsqueeze(-1) // 2).view(m, k // 2)  # type: ignore[possibly-undefined]
 
     meta_4 = idxs0 | (idxs1 << 2)
     meta_n = meta_4.view((-1, meta_ncols, quadbits_per_meta_elem)).to(meta_dtype)
@@ -163,7 +163,7 @@ def sparse_semi_structured_from_dense_cutlass(dense):
         )
 
     # Reorder meta tensor elements.
-    meta_reordered = meta.new_empty((m * meta_ncols,))
+    meta_reordered = meta.new_empty((m * meta_ncols,))  # type: ignore[possibly-undefined]
     meta_offsets = _calculate_meta_reordering_scatter_offsets(
         m, meta_ncols, meta_dtype, device
     )
diff --git a/torch/sparse/_triton_ops.py b/torch/sparse/_triton_ops.py
index c3ff60697dec..30fb91b91dcd 100644
--- a/torch/sparse/_triton_ops.py
+++ b/torch/sparse/_triton_ops.py
@@ -1662,7 +1662,7 @@ if has_triton():
         acc_block = tl.zeros((TILE_M, TILE_N), dtype=dot_out_dtype)
 
         if is_compressed:
-            A_ptr += r0 * blocks_stride_P
+            A_ptr += r0 * blocks_stride_P  # type: ignore[possibly-undefined]
             for _ in range(nnz):
                 q = tl.load(q_ptr)
                 B = tl.load(B_ptr + q)
@@ -1889,7 +1889,7 @@ if has_triton():
 
         # alpha is never 0
         if beta_is_nonzero:
-            output_acc_block = tl.load(input_ptrs).to(acc_dtype)
+            output_acc_block = tl.load(input_ptrs).to(acc_dtype)  # type: ignore[possibly-undefined]
             if not (beta_is_one and alpha_is_one):
                 beta_alpha = beta / alpha
                 output_acc_block *= beta_alpha
diff --git a/torch/utils/benchmark/examples/spectral_ops_fuzz_test.py b/torch/utils/benchmark/examples/spectral_ops_fuzz_test.py
index c70395573adb..3ac54059416c 100644
--- a/torch/utils/benchmark/examples/spectral_ops_fuzz_test.py
+++ b/torch/utils/benchmark/examples/spectral_ops_fuzz_test.py
@@ -76,7 +76,7 @@ def _output_csv(file, results):
             dim_str = str(dim)
             shape_str = 'x'.join(str(s) for s in shape)
 
-        print(name, device, measurement.task_spec.num_threads, numel, shape_str, contiguous, dim_str,
+        print(name, device, measurement.task_spec.num_threads, numel, shape_str, contiguous, dim_str,  # type: ignore[possibly-undefined]
               measurement.mean * 1e6, measurement.median * 1e6, measurement.iqr * 1e6,
               sep=',', file=file)
 
diff --git a/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py b/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py
index 13e951c1c4a3..5a3e9f635891 100644
--- a/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py
+++ b/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py
@@ -701,7 +701,7 @@ class _ValgrindWrapper:
                         if fn_match:
                             ir_str, file_function = fn_match.groups()
                             ir = int(ir_str.replace(",", ""))
-                            if ir == program_totals:
+                            if ir == program_totals:  # type: ignore[possibly-undefined]
                                 # Callgrind includes some top level red herring symbols when
                                 # a program dumps multiple profiles.
                                 continue
diff --git a/torch/utils/checkpoint.py b/torch/utils/checkpoint.py
index 7dae01b4bc4d..aa2c2c513c24 100644
--- a/torch/utils/checkpoint.py
+++ b/torch/utils/checkpoint.py
@@ -1427,7 +1427,7 @@ def _checkpoint_without_reentrant_generator(
     new_frame.forward_completed = True
 
     if getattr(device_module, "_initialized", False) and \
-       preserve_rng_state and not had_device_in_fwd:
+       preserve_rng_state and not had_device_in_fwd:  # type: ignore[possibly-undefined]
         # Device was not initialized before running the forward, so we didn't
         # stash the device state.
         raise RuntimeError(
diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
index 16253140f6cc..7641ca77de21 100644
--- a/torch/utils/cpp_extension.py
+++ b/torch/utils/cpp_extension.py
@@ -2391,7 +2391,7 @@ def _write_ninja_file(path,
     # 'Blocks' should be separated by newlines, for visual benefit.
     blocks = [config, flags, compile_rule]
     if with_cuda:
-        blocks.append(cuda_compile_rule)
+        blocks.append(cuda_compile_rule)  # type: ignore[possibly-undefined]
     blocks += [devlink_rule, link_rule, build, devlink, link, default]
     content = "\n\n".join("\n".join(b) for b in blocks)
     # Ninja requires a new lines at the end of the .ninja file
diff --git a/torch/utils/data/_utils/worker.py b/torch/utils/data/_utils/worker.py
index d3dd9ac47d4c..137791c4c436 100644
--- a/torch/utils/data/_utils/worker.py
+++ b/torch/utils/data/_utils/worker.py
@@ -305,7 +305,7 @@ def _worker_loop(dataset_kind, dataset, index_queue, data_queue, done_event,
                 init_exception = None
             else:
                 try:
-                    data = fetcher.fetch(index)
+                    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
                 except Exception as e:
                     if isinstance(e, StopIteration) and dataset_kind == _DatasetKind.Iterable:
                         data = _IterableDatasetStopIteration(worker_id)
diff --git a/torch/utils/data/dataloader.py b/torch/utils/data/dataloader.py
index 3b09d094f39c..f18bb602b50d 100644
--- a/torch/utils/data/dataloader.py
+++ b/torch/utils/data/dataloader.py
@@ -1360,7 +1360,7 @@ class _MultiProcessingDataLoaderIter(_BaseDataLoaderIter):
             # not found (i.e., didn't break)
             return
 
-        self._index_queues[worker_queue_idx].put((self._send_idx, index))
+        self._index_queues[worker_queue_idx].put((self._send_idx, index))  # type: ignore[possibly-undefined]
         self._task_info[self._send_idx] = (worker_queue_idx,)
         self._tasks_outstanding += 1
         self._send_idx += 1
diff --git a/torch/utils/data/datapipes/iter/combining.py b/torch/utils/data/datapipes/iter/combining.py
index 9329e96a03b7..9a4365516a33 100644
--- a/torch/utils/data/datapipes/iter/combining.py
+++ b/torch/utils/data/datapipes/iter/combining.py
@@ -210,7 +210,7 @@ class _ForkerIterDataPipe(IterDataPipe, _ContainerTemplate):
                     raise BufferError("ForkerIterDataPipe buffer overflow," +
                                       f"buffer size {self.buffer_size} is insufficient.")
 
-                yield self.copy_fn(return_val)
+                yield self.copy_fn(return_val)  # type: ignore[possibly-undefined]
         finally:
             self._child_stop[instance_id] = True
             # Cleanup _datapipe_iterator for the case that fork exits earlier
diff --git a/torch/utils/tensorboard/writer.py b/torch/utils/tensorboard/writer.py
index bd8c5d188dc5..e9a1e039040f 100644
--- a/torch/utils/tensorboard/writer.py
+++ b/torch/utils/tensorboard/writer.py
@@ -907,7 +907,7 @@ class SummaryWriter:
             else:
                 # Handles cnn.CNNModelHelper, model_helper.ModelHelper
                 current_graph = model_to_graph_def(model)
-            event = event_pb2.Event(graph_def=current_graph.SerializeToString())
+            event = event_pb2.Event(graph_def=current_graph.SerializeToString())  # type: ignore[possibly-undefined]
             self._get_file_writer().add_event(event)
 
     @staticmethod
diff --git a/torchgen/dest/register_dispatch_key.py b/torchgen/dest/register_dispatch_key.py
index 69523855d726..114b641c5b4d 100644
--- a/torchgen/dest/register_dispatch_key.py
+++ b/torchgen/dest/register_dispatch_key.py
@@ -717,10 +717,10 @@ resize_out(out, sizes, strides, options);
             f"{textwrap.indent(class_ctor_str, indent)}",
             f"{textwrap.indent(self.gen_class_set_output_functions(k, parent_class, generate_super), indent)}",
             "    const Tensor& maybe_get_output(int64_t output_idx) override {",
-            f"      return {output_value};\n",
+            f"      return {output_value};\n",  # type: ignore[possibly-undefined]  # TODO: audit
             "    }",
-            f"    std::array<{output_type}, {len(f.func.returns)}> outputs_;",
-            f"{textwrap.indent(proxy_field, indent)}",
+            f"    std::array<{output_type}, {len(f.func.returns)}> outputs_;",  # type: ignore[possibly-undefined]  # TODO: audit
+            f"{textwrap.indent(proxy_field, indent)}",  # type: ignore[possibly-undefined]  # TODO: audit
             f"{textwrap.indent(guard_field, indent)}",
             "};",
         )
@@ -962,7 +962,7 @@ return {sig.name()}({', '.join(e.expr for e in translate(cpp_sig.arguments(), si
                 else:
                     refs = ", ".join(a.name for a in f.func.arguments.out)
                     ret_expr = f"std::forward_as_tuple({refs})"
-            sig_body.append(f"return {ret_expr};")
+            sig_body.append(f"return {ret_expr};")  # type: ignore[possibly-undefined]  # TODO: audit
 
             sig_body_str = "\n".join(sig_body)