diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index 5e2743eeb4b9..5eb4e1953ee3 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -650,6 +650,10 @@ def cpp_flags(): return "-std=c++17 -Wno-unused-variable" +def cpp_wrapper_flags(): + return "-DTORCH_INDUCTOR_CPP_WRAPPER" + + def optimization_flags(): base_flags = "-O3 -ffast-math -fno-finite-math-only" if config.is_fbcode(): @@ -1138,8 +1142,10 @@ class CppWrapperCodeCache: cuda=cuda, ) _use_custom_generated_macros = use_custom_generated_macros() + _cpp_wrapper_flags = cpp_wrapper_flags() - extra_cflags = f"{_cpp_flags} {_opt_flags} {_warning_all_flag} {_macros} {_use_custom_generated_macros}" + extra_cflags = f"{_cpp_flags} {_opt_flags} {_warning_all_flag} {_macros} {_cpp_wrapper_flags} \ + {_use_custom_generated_macros}" # For CPP wrapper, add -ffast-math during linking to make CPU flush denormals. # CPP wrapper leverages cpp_extension which will do the compilation and linking in two stages. # We need to explicitly add -ffast-math as a linking flag. diff --git a/torch/extension.h b/torch/extension.h index 2ba111736b92..671ae1aadb8d 100644 --- a/torch/extension.h +++ b/torch/extension.h @@ -1,6 +1,9 @@ #pragma once +#ifndef TORCH_INDUCTOR_CPP_WRAPPER // All pure C++ headers for the C++ frontend. #include +#endif + // Python bindings for the C++ frontend (includes Python.h). #include diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 70bba3ab7023..6600eb3cb540 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -1593,6 +1593,8 @@ def load_inline(name, if use_pch is True: # Using PreCompile Header('torch/extension.h') to reduce compile time. _check_and_build_extension_h_precompiler_headers(extra_cflags, extra_include_paths) + else: + remove_extension_h_precompiler_headers() # If `functions` is supplied, we create the pybind11 bindings for the user. # Here, `functions` is (or becomes, after some processing) a map from