diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 53cbfd09ebba..893715750e53 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -14718,25 +14718,29 @@ # Collectives - func: all_reduce(Tensor self, str reduceOp, str tag, int[] ranks, int group_size) -> Tensor - python_module: dist + # This should be changed to distributed but it requires changes all over the place to work + python_module: nn dispatch: CompositeExplicitAutograd: all_reduce variants: function - func: all_gather_into_tensor(Tensor shard, str tag, int[] ranks, int group_size) -> Tensor - python_module: dist + # This should be changed to distributed but it requires changes all over the place to work + python_module: nn dispatch: CompositeExplicitAutograd: all_gather_into_tensor variants: function - func: reduce_scatter_tensor(Tensor input, str reduceOp, int scatter_dim, str tag, int[] ranks, int group_size) -> Tensor - python_module: dist + # This should be changed to distributed but it requires changes all over the place to work + python_module: nn dispatch: CompositeExplicitAutograd: reduce_scatter_tensor variants: function - func: wait_tensor(Tensor self) -> Tensor - python_module: dist + # This should be changed to distributed but it requires changes all over the place to work + python_module: nn dispatch: CompositeExplicitAutograd: wait_tensor diff --git a/build.bzl b/build.bzl index 613e0727d935..6d1bcf7a0601 100644 --- a/build.bzl +++ b/build.bzl @@ -260,7 +260,6 @@ _GENERATED_AUTOGRAD_PYTHON_CPP = [ "torch/csrc/autograd/generated/python_nested_functions.cpp", "torch/csrc/autograd/generated/python_fft_functions.cpp", "torch/csrc/autograd/generated/python_linalg_functions.cpp", - "torch/csrc/autograd/generated/python_dist_functions.cpp", "torch/csrc/autograd/generated/python_return_types.cpp", "torch/csrc/autograd/generated/python_enum_tag.cpp", "torch/csrc/autograd/generated/python_sparse_functions.cpp", diff --git a/build_variables.bzl b/build_variables.bzl index 2d5ba1eaf11e..0f2ee809b586 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -930,7 +930,6 @@ def glob_libtorch_python_sources(gencode_pattern = ":generate-code[{}]"): "torch/csrc/autograd/generated/python_nn_functions.cpp", "torch/csrc/autograd/generated/python_fft_functions.cpp", "torch/csrc/autograd/generated/python_linalg_functions.cpp", - "torch/csrc/autograd/generated/python_dist_functions.cpp", "torch/csrc/autograd/generated/python_enum_tag.cpp", "torch/csrc/autograd/generated/python_return_types.cpp", "torch/csrc/autograd/generated/python_sparse_functions.cpp", diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 3847a4061471..1c2539b920dd 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -394,7 +394,6 @@ set(GENERATED_CXX_PYTHON "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nested_functions.cpp" "${TORCH_SRC_DIR}/csrc/autograd/generated/python_sparse_functions.cpp" "${TORCH_SRC_DIR}/csrc/autograd/generated/python_special_functions.cpp" - "${TORCH_SRC_DIR}/csrc/autograd/generated/python_dist_functions.cpp" "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.cpp" "${TORCH_SRC_DIR}/csrc/autograd/generated/python_enum_tag.cpp" ) diff --git a/pt_template_srcs.bzl b/pt_template_srcs.bzl index 4caaea191a49..feb14818700f 100644 --- a/pt_template_srcs.bzl +++ b/pt_template_srcs.bzl @@ -149,7 +149,6 @@ def get_generate_code_bin_outs(): "autograd/generated/python_return_types.cpp": ["autograd/generated/python_return_types.cpp"], "autograd/generated/python_sparse_functions.cpp": ["autograd/generated/python_sparse_functions.cpp"], "autograd/generated/python_special_functions.cpp": ["autograd/generated/python_special_functions.cpp"], - "autograd/generated/python_dist_functions.cpp": ["autograd/generated/python_dist_functions.cpp"], "autograd/generated/python_torch_functions_0.cpp": ["autograd/generated/python_torch_functions_0.cpp"], "autograd/generated/python_torch_functions_1.cpp": ["autograd/generated/python_torch_functions_1.cpp"], "autograd/generated/python_torch_functions_2.cpp": ["autograd/generated/python_torch_functions_2.cpp"], diff --git a/tools/BUCK.bzl b/tools/BUCK.bzl index 2e4a668dfa61..25c85afb3f88 100644 --- a/tools/BUCK.bzl +++ b/tools/BUCK.bzl @@ -135,7 +135,6 @@ def define_tools_targets( "autograd/templates/python_return_types.cpp", "autograd/templates/python_sparse_functions.cpp", "autograd/templates/python_special_functions.cpp", - "autograd/templates/python_dist_functions.cpp", "autograd/templates/python_torch_functions.cpp", "autograd/templates/python_variable_methods.cpp", "autograd/templates/variable_factories.h", diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index 7dca30613f5d..795df8c2ec44 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -239,10 +239,6 @@ def is_py_special_function(f: NativeFunction) -> bool: return f.python_module == "special" -def is_py_dist_function(f: NativeFunction) -> bool: - return f.python_module == "dist" - - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # # Main Function @@ -349,15 +345,6 @@ def gen( symint=symint, ) - create_python_bindings( - fm, - functions, - is_py_dist_function, - "torch.distributed.functional", - "python_dist_functions.cpp", - method=False, - ) - # Currently, we only use `functions` to generate `return_types` bindings. # All methods which return namedtuple have function variant at this point. # If any method only operator with namedtuple is added in the future, @@ -915,7 +902,6 @@ if(check_has_torch_function(self_)) {{ "torch.nested": "THPNestedVariableFunctionsModule", "torch.sparse": "THPSparseVariableFunctionsModule", "torch.special": "THPSpecialVariableFunctionsModule", - "torch.distributed.functional": "THPDistVariableFunctionsModule", }[module] if module else "THPVariableClass" diff --git a/tools/autograd/templates/python_dist_functions.cpp b/tools/autograd/templates/python_dist_functions.cpp deleted file mode 100644 index 416ac5b2c7c5..000000000000 --- a/tools/autograd/templates/python_dist_functions.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#define TORCH_ASSERT_ONLY_METHOD_OPERATORS -// ${generated_comment} - -#include "torch/csrc/Device.h" -#include "torch/csrc/DynamicTypes.h" -#include "torch/csrc/Exceptions.h" -#include "torch/csrc/autograd/python_dist_functions.h" -#include "torch/csrc/autograd/python_return_types.h" -#include "torch/csrc/autograd/python_variable.h" -#include "torch/csrc/autograd/utils/wrap_outputs.h" -#include "torch/csrc/autograd/utils/python_arg_parsing.h" -#include "torch/csrc/utils/pycfunction_helpers.h" -#include "torch/csrc/utils/python_arg_parser.h" -#include "torch/csrc/utils/structseq.h" -#include "torch/csrc/utils/tensor_memoryformats.h" - -#ifndef AT_PER_OPERATOR_HEADERS -#include -#else -$ops_headers -#endif - -using at::Tensor; -using at::Scalar; -using at::MemoryFormat; -using at::Generator; -using at::IntArrayRef; -using at::ArrayRef; - -using namespace torch::autograd::utils; - -namespace torch { namespace autograd { - -// generated forward declarations start here - -${py_forwards} - -static PyMethodDef dist_functions[] = { - ${py_method_defs} - {NULL} -}; - -static PyObject* THPDistVariableFunctionsModule = NULL; - -void initDistFunctions(PyObject* module) { - static struct PyModuleDef def = { - PyModuleDef_HEAD_INIT, - "torch._C._dist", - NULL, - -1, - dist_functions - }; - PyObject* dist = PyModule_Create(&def); - THPDistVariableFunctionsModule = dist; - if (!dist) { - throw python_error(); - } - // steals a reference to dist - if (PyModule_AddObject(module, "_dist", dist) != 0) { - throw python_error(); - } -} - -// generated methods start here - -${py_methods} - -}} // namespace torch::autograd diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index c9ff2de5879f..97ad819a67c6 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -1329,7 +1328,6 @@ PyObject* initModule() { torch::autograd::initNestedFunctions(module); torch::autograd::initSparseFunctions(module); torch::autograd::initSpecialFunctions(module); - torch::autograd::initDistFunctions(module); torch::autograd::init_legacy_variable(module); torch::profiler::initPythonBindings(module); torch::python::init_bindings(module); diff --git a/torch/csrc/autograd/python_dist_functions.h b/torch/csrc/autograd/python_dist_functions.h deleted file mode 100644 index f5f937f51976..000000000000 --- a/torch/csrc/autograd/python_dist_functions.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -namespace torch { -namespace autograd { - -void initDistFunctions(PyObject* module); - -} // namespace autograd -} // namespace torch diff --git a/torch/distributed/_functional_collectives.py b/torch/distributed/_functional_collectives.py index 952a33258d34..d53b47ed939e 100644 --- a/torch/distributed/_functional_collectives.py +++ b/torch/distributed/_functional_collectives.py @@ -105,7 +105,7 @@ class AsyncCollectiveTensor(torch.Tensor): Use it inside functional collective pytorch wrappers like the following: def functional_collective(self, group, tag): tag, rankset, group_size = _expand_group(group, tag) - tensor = torch._C._dist.{collective}(self, tag, rankset, group_size) + tensor = torch._C._nn.{collective}(self, tag, rankset, group_size) res = AsyncCollectiveTensor(tensor) _register_wrapper_tensor(res, tensor) return res @@ -254,7 +254,7 @@ def wait_tensor(tensor): Waiting follows device semantics, which means blocking on CPU and synchronizing streams on CUDA. """ - return torch._C._dist.wait_tensor(tensor) # type: ignore[attr-defined] + return torch._C._nn.wait_tensor(tensor) # type: ignore[attr-defined] def all_reduce(self: torch.Tensor, reduceOp: str, group: RANK_TYPES, tag: str = ""): @@ -275,7 +275,7 @@ def all_reduce(self: torch.Tensor, reduceOp: str, group: RANK_TYPES, tag: str = that information and perform collective algebraic optimization. Use other forms of input for that. """ tag, rankset, group_size = _expand_group(group, tag) - tensor = torch._C._dist.all_reduce(self, reduceOp, tag, rankset, group_size) # type: ignore[attr-defined] + tensor = torch._C._nn.all_reduce(self, reduceOp, tag, rankset, group_size) # type: ignore[attr-defined] res = AsyncCollectiveTensor(tensor) _register_wrapper_tensor(res, tensor) return res @@ -307,9 +307,7 @@ def reduce_scatter_tensor( assert ( self.size(0) % group_size == 0 ), f"input dimension 0 ({self.size(0)} must be a multiple of group_size {group_size}" - tensor = torch._C._dist.reduce_scatter_tensor( # type: ignore[attr-defined] - self, reduceOp, scatter_dim, tag, rankset, group_size - ) + tensor = torch._C._nn.reduce_scatter_tensor(self, reduceOp, scatter_dim, tag, rankset, group_size) # type: ignore[attr-defined] res = AsyncCollectiveTensor(tensor) _register_wrapper_tensor(res, tensor) return res