Files
pytorch/build.bzl
Joel Schlosser d5a6762263 Reify view_func() closures as ViewFuncs (#118404)
Replaces `view_func()` closures with a reified `ViewFunc` data structure. Codegen generates a `ViewFunc` subclass for each view op (e.g. `NarrowViewFunc`) containing state needed to reconstruct the view. The `ViewFunc` API allows for querying and hot-swapping any `SymInt`s or `Tensors` in the state through `get_symints()` / `get_tensors()` / `clone_and_set()`, which will be essential for fake-ification later on.

```cpp
/// Base class for view functions, providing reapplication of a view on a new base.
/// Each view op should get a codegenerated subclass of this class containing
/// any state needed to reconstruct the view. The class also provides convenience
/// accessors for saved SymInts / tensor state. This is useful for e.g. fake-ification,
/// where we want to use symbolic values or fake tensors instead.
struct TORCH_API ViewFunc {
  virtual ~ViewFunc() {}
  /// Returns any SymInts in the saved state.
  virtual std::vector<c10::SymInt> get_symints() const { return {}; }
  /// Returns the number of SymInts in the saved state.
  virtual size_t num_symints() const { return 0; }
  /// Returns any tensors in the saved state.
  virtual std::vector<at::Tensor> get_tensors() const { return {}; }
  /// Returns the number of tensors in the saved state.
  virtual size_t num_tensors() const { return 0; }
  /// Reapplies the view on the given base using the saved state.
  virtual at::Tensor operator()(const at::Tensor&) const = 0;
  /// Returns a clone of this ViewFunc, optionally with the specified saved state.
  virtual std::unique_ptr<ViewFunc> clone_and_set(
      std::optional<std::vector<c10::SymInt>> = c10::nullopt,
      std::optional<std::vector<at::Tensor>> = c10::nullopt) const = 0;

protected:
  /// Sets the values of any SymInts in the saved state. The input vector size must
  /// match the number of SymInts in the saved state (i.e. the size of the list
  /// returned by get_symints()).
  virtual void set_symints(std::vector<c10::SymInt>) {}
  /// Sets the values of any Tensors in the saved state. The input vector size must
  /// match the number of Tensors in the saved state (i.e. the size of the list
  /// returned by get_tensors()).
  virtual void set_tensors(std::vector<at::Tensor>) {}
};
```

New codegen files:
* `torch/csrc/autograd/generated/ViewFunc.h`
* `torch/csrc/autograd/generated/ViewFuncs.cpp`

The templates for these also contains impls for `ChainedViewFunc` and `ErroringViewFunc` which are used in a few places within autograd.

Example codegen for `slice.Tensor`:
```cpp
// torch/csrc/autograd/generated/ViewFuncs.h
#define SLICE_TENSOR_VIEW_FUNC_AVAILABLE
struct SliceTensorViewFunc : public torch::autograd::ViewFunc {
  SliceTensorViewFunc(int64_t dim, c10::optional<c10::SymInt> start, c10::optional<c10::SymInt> end, c10::SymInt step) : dim(dim), start(start), end(end), step(step)
  {};
  virtual ~SliceTensorViewFunc() override {};
  virtual std::vector<c10::SymInt> get_symints() const override;
  virtual size_t num_symints() const override;
  virtual std::vector<at::Tensor> get_tensors() const override;
  virtual size_t num_tensors() const override;
  virtual at::Tensor operator()(const at::Tensor&) const override;
  virtual std::unique_ptr<ViewFunc> clone_and_set(
      std::optional<std::vector<c10::SymInt>> = c10::nullopt,
      std::optional<std::vector<at::Tensor>> = c10::nullopt) const override;

protected:
  virtual void set_symints(std::vector<c10::SymInt>) override;
  virtual void set_tensors(std::vector<at::Tensor>) override;

private:
  int64_t dim;
  c10::optional<c10::SymInt> start;
  c10::optional<c10::SymInt> end;
  c10::SymInt step;
};
...

// torch/csrc/autograd/generated/ViewFuncs.cpp
std::vector<c10::SymInt> SliceTensorViewFunc::get_symints() const {
  ::std::vector<c10::SymInt> symints;
  symints.reserve((start.has_value() ? 1 : 0) + (end.has_value() ? 1 : 0) + 1);
  if(start.has_value()) symints.insert(symints.end(), *(start));
  if(end.has_value()) symints.insert(symints.end(), *(end));
  symints.push_back(step);
  return symints;
}

size_t SliceTensorViewFunc::num_symints() const {
  return static_cast<size_t>((start.has_value() ? 1 : 0) + (end.has_value() ? 1 : 0) + 1);
}

void SliceTensorViewFunc::set_symints(std::vector<c10::SymInt> symints) {
  TORCH_INTERNAL_ASSERT(symints.size() == num_symints());
  auto i = 0;
  if(start.has_value()) start = symints[i];
  i += (start.has_value() ? 1 : 0);
  if(end.has_value()) end = symints[i];
  i += (end.has_value() ? 1 : 0);
  step = symints[i];
}

std::vector<at::Tensor> SliceTensorViewFunc::get_tensors() const {
  ::std::vector<at::Tensor> tensors;
  return tensors;
}

size_t SliceTensorViewFunc::num_tensors() const {
  return static_cast<size_t>(0);
}

void SliceTensorViewFunc::set_tensors(std::vector<at::Tensor> tensors) {
  TORCH_INTERNAL_ASSERT(tensors.size() == num_tensors());

}

at::Tensor SliceTensorViewFunc::operator()(const at::Tensor& input_base) const {
  return at::_ops::slice_Tensor::call(input_base, dim, start, end, step);
}

std::unique_ptr<ViewFunc> SliceTensorViewFunc::clone_and_set(
    std::optional<std::vector<c10::SymInt>> symints,
    std::optional<std::vector<at::Tensor>> tensors) const {
  auto output = std::make_unique<SliceTensorViewFunc>(dim, start, end, step);
  if (symints.has_value()) {
    output->set_symints(std::move(*(symints)));
  }
  if (tensors.has_value()) {
    output->set_tensors(std::move(*(tensors)));
  }
  return output;
}
```

The `_view_func()` / `_view_func_unsafe()` methods now accept two additional (optional) args for `symint_visitor_fn` / `tensor_visitor_fn`. If these are defined, they are expected to be python callables that operate on a single SymInt / tensor and return a new one. This allows for the hot-swapping needed during fake-ification.

For testing, there are extensive pre-existing tests, and I added a test to ensure that hot-swapping functions correctly.
```sh
python test/test_autograd.py -k test_view_func_replay
python test/test_ops.py -k test_view_replay
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/118404
Approved by: https://github.com/ezyang
2024-02-09 18:51:36 +00:00

319 lines
11 KiB
Python

load(
":ufunc_defs.bzl",
"aten_ufunc_generated_cpu_kernel_sources",
"aten_ufunc_generated_cpu_sources",
"aten_ufunc_generated_cuda_sources",
)
def define_targets(rules):
rules.cc_library(
name = "caffe2_core_macros",
hdrs = [":caffe2_core_macros_h"],
)
rules.cmake_configure_file(
name = "caffe2_core_macros_h",
src = "caffe2/core/macros.h.in",
out = "caffe2/core/macros.h",
definitions = [
"CAFFE2_BUILD_SHARED_LIBS",
"CAFFE2_PERF_WITH_AVX",
"CAFFE2_PERF_WITH_AVX2",
"CAFFE2_PERF_WITH_AVX512",
"CAFFE2_USE_EXCEPTION_PTR",
"CAFFE2_USE_CUDNN",
"USE_MKLDNN",
"CAFFE2_USE_ITT",
"USE_ROCM_KERNEL_ASSERT",
"EIGEN_MPL2_ONLY",
],
)
rules.cc_library(
name = "caffe2_serialize",
srcs = [
"caffe2/serialize/file_adapter.cc",
"caffe2/serialize/inline_container.cc",
"caffe2/serialize/istream_adapter.cc",
"caffe2/serialize/read_adapter_interface.cc",
],
copts = ["-fexceptions"],
tags = [
"-fbcode",
"supermodule:android/default/pytorch",
"supermodule:ios/default/public.pytorch",
"xplat",
],
visibility = ["//visibility:public"],
deps = [
":caffe2_headers",
"//c10",
"//third_party/miniz-2.1.0:miniz",
"@com_github_glog//:glog",
],
)
#
# ATen generated code
# You need to keep this is sync with the files written out
# by gen.py (in the cmake build system, we track generated files
# via generated_cpp.txt and generated_cpp.txt-cuda
#
# Sure would be nice to use gen.py to create this list dynamically
# instead of hardcoding, no? Well, we can't, as discussed in this
# thread:
# https://fb.facebook.com/groups/askbuck/permalink/1924258337622772/
gen_aten_srcs = [
"aten/src/ATen/native/native_functions.yaml",
"aten/src/ATen/native/tags.yaml",
] + rules.glob(["aten/src/ATen/templates/*"])
gen_aten_cmd = " ".join([
"$(execpath //torchgen:gen)",
"--install_dir=$(RULEDIR)",
"--source-path aten/src/ATen",
] + (["--static_dispatch_backend CPU"] if rules.is_cpu_static_dispatch_build() else []))
gen_aten_outs_cuda = (
GENERATED_H_CUDA + GENERATED_CPP_CUDA +
aten_ufunc_generated_cuda_sources()
)
gen_aten_outs = (
GENERATED_H + GENERATED_H_CORE +
GENERATED_CPP + GENERATED_CPP_CORE +
aten_ufunc_generated_cpu_sources() +
aten_ufunc_generated_cpu_kernel_sources() + [
"Declarations.yaml",
] + gen_aten_outs_cuda
)
rules.genrule(
name = "gen_aten",
srcs = gen_aten_srcs,
outs = gen_aten_outs,
cmd = gen_aten_cmd,
tools = ["//torchgen:gen"],
)
rules.genrule(
name = "gen_aten_hip",
srcs = gen_aten_srcs,
outs = gen_aten_outs_cuda,
cmd = gen_aten_cmd + " --rocm",
features = ["-create_bazel_outputs"],
tags = ["-bazel"],
tools = ["//torchgen:gen"],
)
rules.genrule(
name = "generate-code",
srcs = [
":DispatchKeyNativeFunctions.cpp",
":DispatchKeyNativeFunctions.h",
":LazyIr.h",
":LazyNonNativeIr.h",
":RegisterDispatchDefinitions.ini",
":RegisterDispatchKey.cpp",
":native_functions.yaml",
":shape_inference.h",
":tags.yaml",
":ts_native_functions.cpp",
":ts_native_functions.yaml",
],
outs = GENERATED_AUTOGRAD_CPP + GENERATED_AUTOGRAD_PYTHON + GENERATED_TESTING_PY,
cmd = "$(execpath //tools/setup_helpers:generate_code) " +
"--gen-dir=$(RULEDIR) " +
"--native-functions-path $(location :native_functions.yaml) " +
"--tags-path=$(location :tags.yaml) " +
"--gen_lazy_ts_backend",
tools = ["//tools/setup_helpers:generate_code"],
)
rules.cc_library(
name = "generated-autograd-headers",
hdrs = [":{}".format(h) for h in _GENERATED_AUTOGRAD_CPP_HEADERS + _GENERATED_AUTOGRAD_PYTHON_HEADERS],
visibility = ["//visibility:public"],
)
rules.genrule(
name = "version_h",
srcs = [
":torch/csrc/api/include/torch/version.h.in",
":version.txt",
],
outs = ["torch/csrc/api/include/torch/version.h"],
cmd = "$(execpath //tools/setup_helpers:gen_version_header) " +
"--template-path $(location :torch/csrc/api/include/torch/version.h.in) " +
"--version-path $(location :version.txt) --output-path $@ ",
tools = ["//tools/setup_helpers:gen_version_header"],
)
#
# ATen generated code
# You need to keep this is sync with the files written out
# by gen.py (in the cmake build system, we track generated files
# via generated_cpp.txt and generated_cpp.txt-cuda
#
# Sure would be nice to use gen.py to create this list dynamically
# instead of hardcoding, no? Well, we can't, as discussed in this
# thread:
# https://fb.facebook.com/groups/askbuck/permalink/1924258337622772/
GENERATED_H = [
"Functions.h",
"NativeFunctions.h",
"NativeMetaFunctions.h",
"FunctionalInverses.h",
"RedispatchFunctions.h",
"RegistrationDeclarations.h",
"VmapGeneratedPlumbing.h",
]
GENERATED_H_CORE = [
"Operators.h",
# CPUFunctions.h (and likely similar headers) need to be part of core because
# of the static dispatch build: TensorBody.h directly includes CPUFunctions.h.
# The disinction looks pretty arbitrary though; maybe will can kill core
# and merge the two?
"CPUFunctions.h",
"CPUFunctions_inl.h",
"CompositeExplicitAutogradFunctions.h",
"CompositeExplicitAutogradFunctions_inl.h",
"CompositeExplicitAutogradNonFunctionalFunctions.h",
"CompositeExplicitAutogradNonFunctionalFunctions_inl.h",
"CompositeImplicitAutogradFunctions.h",
"CompositeImplicitAutogradFunctions_inl.h",
"CompositeImplicitAutogradNestedTensorFunctions.h",
"CompositeImplicitAutogradNestedTensorFunctions_inl.h",
"MetaFunctions.h",
"MetaFunctions_inl.h",
"core/TensorBody.h",
"MethodOperators.h",
"core/aten_interned_strings.h",
"core/enum_tag.h",
]
GENERATED_H_CUDA = [
"CUDAFunctions.h",
"CUDAFunctions_inl.h",
]
GENERATED_CPP_CUDA = [
"RegisterCUDA.cpp",
"RegisterNestedTensorCUDA.cpp",
"RegisterSparseCUDA.cpp",
"RegisterSparseCsrCUDA.cpp",
"RegisterQuantizedCUDA.cpp",
]
GENERATED_CPP = [
"Functions.cpp",
"RegisterBackendSelect.cpp",
"RegisterCPU.cpp",
"RegisterQuantizedCPU.cpp",
"RegisterNestedTensorCPU.cpp",
"RegisterSparseCPU.cpp",
"RegisterSparseCsrCPU.cpp",
"RegisterMkldnnCPU.cpp",
"RegisterCompositeImplicitAutograd.cpp",
"RegisterCompositeImplicitAutogradNestedTensor.cpp",
"RegisterZeroTensor.cpp",
"RegisterMeta.cpp",
"RegisterQuantizedMeta.cpp",
"RegisterNestedTensorMeta.cpp",
"RegisterSparseMeta.cpp",
"RegisterCompositeExplicitAutograd.cpp",
"RegisterCompositeExplicitAutogradNonFunctional.cpp",
"CompositeViewCopyKernels.cpp",
"RegisterSchema.cpp",
"RegisterFunctionalization_0.cpp",
"RegisterFunctionalization_1.cpp",
"RegisterFunctionalization_2.cpp",
"RegisterFunctionalization_3.cpp",
]
GENERATED_CPP_CORE = [
"Operators_0.cpp",
"Operators_1.cpp",
"Operators_2.cpp",
"Operators_3.cpp",
"Operators_4.cpp",
"core/ATenOpList.cpp",
"core/TensorMethods.cpp",
]
# These lists are temporarily living in and exported from the shared
# structure so that an internal build that lives under a different
# root can access them. These could technically live in a separate
# file in the same directory but that would require extra work to
# ensure that file is synced to both Meta internal repositories and
# GitHub. This problem will go away when the targets downstream of
# generate-code that use these lists are moved into the shared
# structure as well.
_GENERATED_AUTOGRAD_PYTHON_HEADERS = [
"torch/csrc/autograd/generated/python_functions.h",
"torch/csrc/autograd/generated/python_return_types.h",
]
_GENERATED_AUTOGRAD_CPP_HEADERS = [
"torch/csrc/autograd/generated/Functions.h",
"torch/csrc/autograd/generated/VariableType.h",
"torch/csrc/autograd/generated/ViewFuncs.h",
"torch/csrc/autograd/generated/variable_factories.h",
]
GENERATED_TESTING_PY = [
"torch/testing/_internal/generated/annotated_fn_args.py",
]
GENERATED_LAZY_H = [
"torch/csrc/lazy/generated/LazyIr.h",
"torch/csrc/lazy/generated/LazyNonNativeIr.h",
"torch/csrc/lazy/generated/LazyNativeFunctions.h",
]
_GENERATED_AUTOGRAD_PYTHON_CPP = [
"torch/csrc/autograd/generated/python_functions_0.cpp",
"torch/csrc/autograd/generated/python_functions_1.cpp",
"torch/csrc/autograd/generated/python_functions_2.cpp",
"torch/csrc/autograd/generated/python_functions_3.cpp",
"torch/csrc/autograd/generated/python_functions_4.cpp",
"torch/csrc/autograd/generated/python_nn_functions.cpp",
"torch/csrc/autograd/generated/python_nested_functions.cpp",
"torch/csrc/autograd/generated/python_fft_functions.cpp",
"torch/csrc/autograd/generated/python_linalg_functions.cpp",
"torch/csrc/autograd/generated/python_return_types.cpp",
"torch/csrc/autograd/generated/python_enum_tag.cpp",
"torch/csrc/autograd/generated/python_sparse_functions.cpp",
"torch/csrc/autograd/generated/python_special_functions.cpp",
"torch/csrc/autograd/generated/python_torch_functions_0.cpp",
"torch/csrc/autograd/generated/python_torch_functions_1.cpp",
"torch/csrc/autograd/generated/python_torch_functions_2.cpp",
"torch/csrc/autograd/generated/python_variable_methods.cpp",
]
GENERATED_AUTOGRAD_PYTHON = _GENERATED_AUTOGRAD_PYTHON_HEADERS + _GENERATED_AUTOGRAD_PYTHON_CPP
GENERATED_AUTOGRAD_CPP = [
"torch/csrc/autograd/generated/Functions.cpp",
"torch/csrc/autograd/generated/VariableType_0.cpp",
"torch/csrc/autograd/generated/VariableType_1.cpp",
"torch/csrc/autograd/generated/VariableType_2.cpp",
"torch/csrc/autograd/generated/VariableType_3.cpp",
"torch/csrc/autograd/generated/VariableType_4.cpp",
"torch/csrc/autograd/generated/ViewFuncs.cpp",
"torch/csrc/autograd/generated/TraceType_0.cpp",
"torch/csrc/autograd/generated/TraceType_1.cpp",
"torch/csrc/autograd/generated/TraceType_2.cpp",
"torch/csrc/autograd/generated/TraceType_3.cpp",
"torch/csrc/autograd/generated/TraceType_4.cpp",
"torch/csrc/autograd/generated/ADInplaceOrViewType_0.cpp",
"torch/csrc/autograd/generated/ADInplaceOrViewType_1.cpp",
"torch/csrc/lazy/generated/LazyNativeFunctions.cpp",
"torch/csrc/lazy/generated/RegisterAutogradLazy.cpp",
"torch/csrc/lazy/generated/RegisterLazy.cpp",
] + _GENERATED_AUTOGRAD_CPP_HEADERS + GENERATED_LAZY_H