diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py
index 5e2743eeb4b9..5eb4e1953ee3 100644
--- a/torch/_inductor/codecache.py
+++ b/torch/_inductor/codecache.py
@@ -650,6 +650,10 @@ def cpp_flags():
     return "-std=c++17 -Wno-unused-variable"
 
 
+def cpp_wrapper_flags():
+    return "-DTORCH_INDUCTOR_CPP_WRAPPER"
+
+
 def optimization_flags():
     base_flags = "-O3 -ffast-math -fno-finite-math-only"
     if config.is_fbcode():
@@ -1138,8 +1142,10 @@ class CppWrapperCodeCache:
                         cuda=cuda,
                     )
                     _use_custom_generated_macros = use_custom_generated_macros()
+                    _cpp_wrapper_flags = cpp_wrapper_flags()
 
-                    extra_cflags = f"{_cpp_flags} {_opt_flags} {_warning_all_flag} {_macros} {_use_custom_generated_macros}"
+                    extra_cflags = f"{_cpp_flags} {_opt_flags} {_warning_all_flag} {_macros} {_cpp_wrapper_flags} \
+                    {_use_custom_generated_macros}"
                     # For CPP wrapper, add -ffast-math during linking to make CPU flush denormals.
                     # CPP wrapper leverages cpp_extension which will do the compilation and linking in two stages.
                     # We need to explicitly add -ffast-math as a linking flag.
diff --git a/torch/extension.h b/torch/extension.h
index 2ba111736b92..671ae1aadb8d 100644
--- a/torch/extension.h
+++ b/torch/extension.h
@@ -1,6 +1,9 @@
 #pragma once
 
+#ifndef TORCH_INDUCTOR_CPP_WRAPPER
 // All pure C++ headers for the C++ frontend.
 #include <torch/all.h>
+#endif
+
 // Python bindings for the C++ frontend (includes Python.h).
 #include <torch/python.h>
diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
index 70bba3ab7023..6600eb3cb540 100644
--- a/torch/utils/cpp_extension.py
+++ b/torch/utils/cpp_extension.py
@@ -1593,6 +1593,8 @@ def load_inline(name,
     if use_pch is True:
         # Using PreCompile Header('torch/extension.h') to reduce compile time.
         _check_and_build_extension_h_precompiler_headers(extra_cflags, extra_include_paths)
+    else:
+        remove_extension_h_precompiler_headers()
 
     # If `functions` is supplied, we create the pybind11 bindings for the user.
     # Here, `functions` is (or becomes, after some processing) a map from