Make lazy codegen honor per-operator-headers flag (#74450)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/74450 - per-operator-headers is a strict build mode where compulation units aren't allowed to depend on bulk headers like ATen/Functions.h, but must instead depend only on the specific operator headers used. (In other configurations, the reverse is required). Test Plan: CI to make sure nothing breaks for existing backends, and rebased next diff manual test to make sure it actually helps Reviewed By: ezyang, bdhirsh Differential Revision: D35002666 fbshipit-source-id: 712445f8d146cf026759444fbd42a20705be9bef (cherry picked from commit f13e5522d49a6edcb6aed4431b1ec8e2b50a98fc)
2025-10-20 21:14:14 +08:00 · 2022-03-22 08:40:37 -07:00
parent 45da320092
commit 93f7f58856
5 changed files with 24 additions and 7 deletions
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -399,6 +399,11 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
    ${GENERATED_TESTING_PYTHON}
    )

+  set(GEN_PER_OPERATOR_FLAG)
+  if(USE_PER_OPERATOR_HEADERS)
+    list(APPEND GEN_PER_OPERATOR_FLAG "--per_operator_headers")
+  endif()
+
  add_custom_command(
    OUTPUT
    ${TORCH_GENERATED_CODE}
@ -410,6 +415,7 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
      $<$<BOOL:${SELECTED_OP_LIST}>:--selected-op-list-path="${SELECTED_OP_LIST}">
      --force_schema_registration
      --gen_lazy_ts_backend
+      ${GEN_PER_OPERATOR_FLAG}
    DEPENDS
    "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
    "${TORCH_ROOT}/aten/src/ATen/native/ts_native_functions.yaml"
--- a/tools/codegen/dest/register_dispatch_key.py
+++ b/tools/codegen/dest/register_dispatch_key.py
@ -43,7 +43,9 @@ def gen_registration_headers(
    elif per_operator_headers:
        headers += [
            "#include <ATen/ops/empty.h>",
-            "#include <ATen/ops/empty_strided.h>"]
+            "#include <ATen/ops/empty_strided.h>",
+            "#include <ATen/ops/_copy_from_and_resize.h>",
+            "#include <ATen/ops/_copy_from.h>"]
    else:
        headers.append("#include <ATen/Functions.h>")

--- a/tools/codegen/gen_backend_stubs.py
+++ b/tools/codegen/gen_backend_stubs.py
@ -241,15 +241,16 @@ def gen_dispatcher_registrations(
        grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]],
        backend_dispatch_key: DispatchKey,
        dispatch_key: DispatchKey,
-        selector: 'SelectiveBuilder') -> None:
+        selector: 'SelectiveBuilder',
+        per_operator_headers: bool = False) -> None:
    backend_index = backend_indices[dispatch_key]
    fm.write_with_template(f'Register{dispatch_key}.cpp', 'RegisterDispatchKey.cpp', lambda: {
        'extra_cuda_headers': '',
        'external_backend_headers': f'#include "{output_dir}/{backend_dispatch_key}NativeFunctions.h"',
-        'ops_headers': '#include <ATen/Functions.h>',
+        'ops_headers': '#include <ATen/Functions.h>' if not per_operator_headers else '',
        'DispatchKey': dispatch_key,
        'dispatch_namespace': dispatch_key.lower(),
-        'dispatch_headers': dest.gen_registration_headers(backend_index, per_operator_headers=False, rocm=False),
+        'dispatch_headers': dest.gen_registration_headers(backend_index, per_operator_headers=per_operator_headers, rocm=False),
        'dispatch_helpers': dest.gen_registration_helpers(backend_index),
        'dispatch_namespaced_definitions': '',
        'dispatch_anonymous_definitions': list(concatMap(
--- a/tools/codegen/gen_lazy_tensor.py
+++ b/tools/codegen/gen_lazy_tensor.py
@ -120,7 +120,8 @@ def run_gen_lazy_tensor(aten_path: str, source_yaml: str, output_dir: str,
                        tensor_class: str = default_args.tensor_class,
                        tensor_class_hdr: str = default_args.tensor_class_hdr,
                        shape_inference_hdr: str = default_args.shape_inference_hdr,
-                        lazy_ir_cls: Type[LazyIR] = default_args.lazy_ir_cls) -> None:
+                        lazy_ir_cls: Type[LazyIR] = default_args.lazy_ir_cls,
+                        per_operator_headers: bool = False) -> None:

    template_dir = os.path.join(aten_path, "templates")

@ -224,7 +225,8 @@ def run_gen_lazy_tensor(aten_path: str, source_yaml: str, output_dir: str,
    # Generate Dispatcher registrations which hook up the nativefunctions
    for dispatch_key in [backend_key] if autograd_key is None else [backend_key, autograd_key]:
        gen_dispatcher_registrations(fm, output_dir, cpp_namespace, backend_indices, grouped_native_functions,
-                                     backend_key, dispatch_key, selector)
+                                     backend_key, dispatch_key, selector,
+                                     per_operator_headers=per_operator_headers)

    # Generate native function impls that build IR nodes
    ns_helper = NamespaceHelper(cpp_namespace)
--- a/tools/setup_helpers/generate_code.py
+++ b/tools/setup_helpers/generate_code.py
@ -167,6 +167,11 @@ def main() -> None:
        action='store_true',
        help='Enable generation of the torch::lazy TorchScript backend'
    )
+    parser.add_argument(
+        '--per_operator_headers',
+        action='store_true',
+        help='Build lazy tensor ts backend with per-operator ATen headers, must match how ATen was built'
+    )
    options = parser.parse_args()

    generate_code(
@ -201,7 +206,8 @@ def main() -> None:
                            impl_path=None,
                            gen_ts_lowerings=True,
                            node_base="TsNode",
-                            node_base_hdr="torch/csrc/lazy/ts_backend/ts_node.h")
+                            node_base_hdr="torch/csrc/lazy/ts_backend/ts_node.h",
+                            per_operator_headers=options.per_operator_headers)


 if __name__ == "__main__":