From 1586521461c8dc642735466fc143b7d366a858d0 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 1 Jul 2025 11:23:00 +0000 Subject: [PATCH] Revert "Compute contiguity symbolically to avoid dde, and introduce c++ sym_is_contiguous (#155590)" This reverts commit 2c76f31221e117b217b8a6a96a5405f626d2218a. Reverted https://github.com/pytorch/pytorch/pull/155590 on behalf of https://github.com/jeanschmidt due to Breaking 1000s of internal builds, it cant be properly landed internally, there are no options except revert and codev. ([comment](https://github.com/pytorch/pytorch/pull/155590#issuecomment-3023503929)) --- .github/ci_commit_pins/xla.txt | 2 +- aten/src/ATen/FunctionalTensorWrapper.cpp | 4 +- aten/src/ATen/FunctionalTensorWrapper.h | 3 +- aten/src/ATen/FunctionalizeFallbackKernel.cpp | 11 +- aten/src/ATen/LegacyBatchedTensorImpl.cpp | 2 +- aten/src/ATen/LegacyBatchedTensorImpl.h | 3 +- aten/src/ATen/MemoryOverlap.cpp | 4 +- aten/src/ATen/NestedTensorImpl.cpp | 2 +- aten/src/ATen/NestedTensorImpl.h | 2 +- aten/src/ATen/SparseCsrTensorImpl.cpp | 3 +- aten/src/ATen/SparseCsrTensorImpl.h | 2 +- aten/src/ATen/core/TensorBase.h | 21 +--- aten/src/ATen/functorch/BatchedTensorImpl.cpp | 2 +- aten/src/ATen/functorch/BatchedTensorImpl.h | 2 +- aten/src/ATen/native/Linear.cpp | 2 +- aten/src/ATen/native/TensorProperties.cpp | 2 +- aten/src/ATen/native/TensorShape.cpp | 9 +- aten/src/ATen/native/metal/MetalTensorImpl.h | 2 +- .../ATen/native/transformers/attention.cpp | 2 +- .../native/vulkan/VulkanOpaqueTensorImpl.h | 3 +- c10/core/Contiguity.h | 77 +++--------- c10/core/SymbolicShapeMeta.cpp | 66 ++-------- c10/core/SymbolicShapeMeta.h | 11 -- c10/core/TensorImpl.cpp | 10 +- c10/core/TensorImpl.h | 118 ++++++------------ c10/core/UndefinedTensorImpl.cpp | 3 +- c10/core/UndefinedTensorImpl.h | 2 +- test/export/test_export.py | 42 ------- test/test_dynamic_shapes.py | 73 +---------- test/test_proxy_tensor.py | 4 +- .../templates/python_variable_methods.cpp | 2 +- torch/csrc/lazy/core/tensor_impl.cpp | 5 +- torch/csrc/lazy/core/tensor_impl.h | 5 +- torch/utils/_sympy/printers.py | 3 - 34 files changed, 114 insertions(+), 390 deletions(-) diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt index dfbc78d8884c..4ee628b53e22 100644 --- a/.github/ci_commit_pins/xla.txt +++ b/.github/ci_commit_pins/xla.txt @@ -1 +1 @@ -926700d7832caa552ba2e1fc8302f6a2f4d2f6d8 +55a75404c9b75cd5fd62ab5d4deafc8c506b3af2 diff --git a/aten/src/ATen/FunctionalTensorWrapper.cpp b/aten/src/ATen/FunctionalTensorWrapper.cpp index c3ca626bc9e0..0c5df695fa96 100644 --- a/aten/src/ATen/FunctionalTensorWrapper.cpp +++ b/aten/src/ATen/FunctionalTensorWrapper.cpp @@ -499,8 +499,8 @@ int64_t FunctionalTensorWrapper::dim_custom() const { int64_t FunctionalTensorWrapper::numel_custom() const { return value_.unsafeGetTensorImpl()->numel(); } -c10::SymBool FunctionalTensorWrapper::sym_is_contiguous_custom(at::MemoryFormat memory_format) const { - return value_.unsafeGetTensorImpl()->sym_is_contiguous(memory_format); +bool FunctionalTensorWrapper::is_contiguous_custom(at::MemoryFormat memory_format) const { + return value_.unsafeGetTensorImpl()->is_contiguous(memory_format); } c10::SymIntArrayRef FunctionalTensorWrapper::sym_sizes_custom() const { return value_.unsafeGetTensorImpl()->sym_sizes(); diff --git a/aten/src/ATen/FunctionalTensorWrapper.h b/aten/src/ATen/FunctionalTensorWrapper.h index bec2d463196c..3dcc82bdd04d 100644 --- a/aten/src/ATen/FunctionalTensorWrapper.h +++ b/aten/src/ATen/FunctionalTensorWrapper.h @@ -236,8 +236,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { at::IntArrayRef strides_custom() const override; int64_t dim_custom() const override; int64_t numel_custom() const override; - c10::SymBool sym_is_contiguous_custom( - at::MemoryFormat memory_format) const override; + bool is_contiguous_custom(at::MemoryFormat memory_format) const override; c10::SymIntArrayRef sym_sizes_custom() const override; c10::SymInt sym_size_custom(int64_t d) const override; c10::SymIntArrayRef sym_strides_custom() const override; diff --git a/aten/src/ATen/FunctionalizeFallbackKernel.cpp b/aten/src/ATen/FunctionalizeFallbackKernel.cpp index 97094c9f125a..bc2170b7ba09 100644 --- a/aten/src/ATen/FunctionalizeFallbackKernel.cpp +++ b/aten/src/ATen/FunctionalizeFallbackKernel.cpp @@ -320,9 +320,11 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt auto stride = at::detail::computeStride(self.sym_sizes(), self.sym_strides(), inferred_size); if (!stride.has_value()) { - - TORCH_SYM_CHECK( - self.sym_is_contiguous(), + // With unbacked symints, computeStride could fail even on contiguous + // tensors. In this case, we can use the strides of an empty tensor of + // inferred_size. + TORCH_CHECK( + self.is_contiguous(), "View is not valid from size:", self.sym_sizes(), " stride: ", @@ -331,9 +333,6 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt inferred_size, " in case of unbacked symbols consider adding torch.check to guide computing strides."); - // With unbacked symints, computeStride could fail even on contiguous - // tensors. In this case, we can use the strides of an empty tensor of - // inferred_size. stride = at::detail::empty_symint_meta( inferred_size, std::nullopt, diff --git a/aten/src/ATen/LegacyBatchedTensorImpl.cpp b/aten/src/ATen/LegacyBatchedTensorImpl.cpp index cceefe985a7e..12c562f5d8e1 100644 --- a/aten/src/ATen/LegacyBatchedTensorImpl.cpp +++ b/aten/src/ATen/LegacyBatchedTensorImpl.cpp @@ -84,7 +84,7 @@ IntArrayRef BatchedTensorImpl::strides_custom() const { // TODO: implement proper contiguity on batched tensor, then put // sizes_strides_policy back to Default -c10::SymBool BatchedTensorImpl::sym_is_contiguous_custom(at::MemoryFormat memory_format) const { +bool BatchedTensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const { TORCH_CHECK(memory_format == MemoryFormat::Contiguous, "NYI: querying is_contiguous inside of vmap for memory_format ", "other than torch.contiguous_format"); diff --git a/aten/src/ATen/LegacyBatchedTensorImpl.h b/aten/src/ATen/LegacyBatchedTensorImpl.h index 798e3535af3f..fa6c472e1fa0 100644 --- a/aten/src/ATen/LegacyBatchedTensorImpl.h +++ b/aten/src/ATen/LegacyBatchedTensorImpl.h @@ -82,8 +82,7 @@ struct TORCH_API BatchedTensorImpl : public c10::TensorImpl { IntArrayRef strides_custom() const override; // Override a bunch of methods inherited from TensorImpl to return error // messages. - c10::SymBool sym_is_contiguous_custom( - at::MemoryFormat memory_format) const override; + bool is_contiguous_custom(at::MemoryFormat memory_format) const override; void set_size(int64_t dim, int64_t new_size) override; void set_stride(int64_t dim, int64_t new_stride) override; void set_storage_offset(int64_t storage_offset) override; diff --git a/aten/src/ATen/MemoryOverlap.cpp b/aten/src/ATen/MemoryOverlap.cpp index 1bc8c30158ae..5e165d3064a3 100644 --- a/aten/src/ATen/MemoryOverlap.cpp +++ b/aten/src/ATen/MemoryOverlap.cpp @@ -24,7 +24,7 @@ MemOverlap has_internal_overlap(TensorImpl* t) { } } - if (t->is_non_overlapping_and_dense_or_false()) { + if (t->is_non_overlapping_and_dense()) { return MemOverlap::No; } @@ -63,7 +63,7 @@ MemOverlapStatus get_overlap_status(const TensorImpl* a, const TensorImpl* b) { if (a->numel() == 0 || b->numel() == 0) { return MemOverlapStatus::No; } - if (!a->is_non_overlapping_and_dense_or_false() || !b->is_non_overlapping_and_dense_or_false()) { + if (!a->is_non_overlapping_and_dense() || !b->is_non_overlapping_and_dense()) { return MemOverlapStatus::TooHard; } // Test for storage equality, rather than pointer equality. diff --git a/aten/src/ATen/NestedTensorImpl.cpp b/aten/src/ATen/NestedTensorImpl.cpp index 647b2f1685d1..b35a7ef9401f 100644 --- a/aten/src/ATen/NestedTensorImpl.cpp +++ b/aten/src/ATen/NestedTensorImpl.cpp @@ -273,7 +273,7 @@ c10::SymInt NestedTensorImpl::sym_numel_custom() const { return NestedTensorImpl::numel_custom(); } -c10::SymBool NestedTensorImpl::sym_is_contiguous_custom(MemoryFormat) const { +bool NestedTensorImpl::is_contiguous_custom(MemoryFormat) const { return nested_tensor_impl_is_contiguous(this); } IntArrayRef NestedTensorImpl::sizes_custom() const { diff --git a/aten/src/ATen/NestedTensorImpl.h b/aten/src/ATen/NestedTensorImpl.h index f40684ce0ba2..697969edbbd4 100644 --- a/aten/src/ATen/NestedTensorImpl.h +++ b/aten/src/ATen/NestedTensorImpl.h @@ -115,7 +115,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl { // with real implementations int64_t numel_custom() const override; c10::SymInt sym_numel_custom() const override; - c10::SymBool sym_is_contiguous_custom(MemoryFormat) const override; + bool is_contiguous_custom(MemoryFormat) const override; int64_t size_custom(int64_t d) const override { return this->size(d); } diff --git a/aten/src/ATen/SparseCsrTensorImpl.cpp b/aten/src/ATen/SparseCsrTensorImpl.cpp index f73d75ab53ad..0ec3c97a2dac 100644 --- a/aten/src/ATen/SparseCsrTensorImpl.cpp +++ b/aten/src/ATen/SparseCsrTensorImpl.cpp @@ -252,7 +252,8 @@ void SparseCsrTensorImpl::set_stride(int64_t dim, int64_t new_stride) { void SparseCsrTensorImpl::set_storage_offset(int64_t storage_offset) { TORCH_CHECK(false, "Sparse ", at::sparse_csr::layoutToString(layout_, /*upper=*/true), " tensors do not have set_storage_offset."); } -c10::SymBool SparseCsrTensorImpl::sym_is_contiguous_custom(MemoryFormat) const { +bool SparseCsrTensorImpl::is_contiguous_custom(MemoryFormat) const { TORCH_CHECK(false, "Sparse ", at::sparse_csr::layoutToString(layout_, /*upper=*/true), " tensors do not have is_contiguous"); } + } // namespace at diff --git a/aten/src/ATen/SparseCsrTensorImpl.h b/aten/src/ATen/SparseCsrTensorImpl.h index 14688163a374..94ac1e1c3934 100644 --- a/aten/src/ATen/SparseCsrTensorImpl.h +++ b/aten/src/ATen/SparseCsrTensorImpl.h @@ -86,7 +86,7 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl { protected: IntArrayRef strides_custom() const override; SymIntArrayRef sym_strides_custom() const override; - SymBool sym_is_contiguous_custom(MemoryFormat) const override; + bool is_contiguous_custom(MemoryFormat) const override; public: void set_size(int64_t dim, int64_t new_size) override; diff --git a/aten/src/ATen/core/TensorBase.h b/aten/src/ATen/core/TensorBase.h index c5c7dfab368b..3dc2ec66f816 100644 --- a/aten/src/ATen/core/TensorBase.h +++ b/aten/src/ATen/core/TensorBase.h @@ -124,7 +124,7 @@ class TORCH_API TensorBase { } TensorBase contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const { - if (is_contiguous_or_false(memory_format)) { + if (is_contiguous(memory_format)) { return *this; } else { return __dispatch_contiguous(memory_format); @@ -265,25 +265,6 @@ class TORCH_API TensorBase { return impl_->is_contiguous(memory_format); } - // Like is_contiguous, but more dynamic shape-friendly. Maybe returns a symbolic representation of - // contiguity instead of SymTrue SymFalse, when results are data-dependent. - c10::SymBool sym_is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { - if (impl_->has_symbolic_sizes_strides()) { - return impl_->sym_is_contiguous(memory_format); - } - return impl_->is_contiguous(memory_format); - } - - // Like is_contiguous, but more dynamic shape-friendly. Can returns - // false instead of throwing data-dependent errors for tensors with unbacked - // sizes or strides. - bool is_contiguous_or_false(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { - if (impl_->has_symbolic_sizes_strides()) { - return impl_->sym_is_contiguous(memory_format).guard_or_false(__FILE__, __LINE__); - } - return impl_->is_contiguous(memory_format); - } - bool is_non_overlapping_and_dense() const { return impl_->is_non_overlapping_and_dense(); } diff --git a/aten/src/ATen/functorch/BatchedTensorImpl.cpp b/aten/src/ATen/functorch/BatchedTensorImpl.cpp index ee222b4e61a5..c3c851445659 100644 --- a/aten/src/ATen/functorch/BatchedTensorImpl.cpp +++ b/aten/src/ATen/functorch/BatchedTensorImpl.cpp @@ -126,7 +126,7 @@ SymIntArrayRef BatchedTensorImpl::sym_strides_custom() const { // TODO: implement proper contiguity on batched tensor, then put // sizes_strides_policy back to Default -c10::SymBool BatchedTensorImpl::sym_is_contiguous_custom(at::MemoryFormat memory_format) const { +bool BatchedTensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const { TORCH_CHECK(memory_format == MemoryFormat::Contiguous, "NYI: querying is_contiguous inside of vmap for memory_format ", "other than torch.contiguous_format"); diff --git a/aten/src/ATen/functorch/BatchedTensorImpl.h b/aten/src/ATen/functorch/BatchedTensorImpl.h index 3eccc94d3ea6..ce3d29008411 100644 --- a/aten/src/ATen/functorch/BatchedTensorImpl.h +++ b/aten/src/ATen/functorch/BatchedTensorImpl.h @@ -69,7 +69,7 @@ struct TORCH_API BatchedTensorImpl : public c10::TensorImpl { IntArrayRef strides_custom() const override; SymIntArrayRef sym_strides_custom() const override; // Override a bunch of methods inherited from TensorImpl to return error messages. - c10::SymBool sym_is_contiguous_custom(at::MemoryFormat memory_format) const override; + bool is_contiguous_custom(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const override; void set_size(int64_t dim, int64_t new_size) override; void set_stride(int64_t dim, int64_t new_stride) override; c10::intrusive_ptr shallow_copy_and_detach( diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp index 5d3a84ea39f6..121d38b55d50 100644 --- a/aten/src/ATen/native/Linear.cpp +++ b/aten/src/ATen/native/Linear.cpp @@ -93,7 +93,7 @@ Tensor linear(const Tensor& input, const Tensor& weight, const std::optionaldefined() && !input.is_xla()) { // Also hit the fused path for contiguous 3D input, if not using xla // backend. Reshaping/flattening has some performance implications on xla. - bool is_contiguous = input.is_contiguous_or_false(); + bool is_contiguous = definitely_contiguous(input.sym_sizes(), input.sym_strides(), input.sym_numel()); if (is_contiguous && input_dim == 3) { return _flatten_nd_linear(input, weight, *bias); } else if (is_contiguous && input.layout() == c10::kStrided && weight.layout() == c10::kStrided && bias->dim() == 1) { diff --git a/aten/src/ATen/native/TensorProperties.cpp b/aten/src/ATen/native/TensorProperties.cpp index 77acfe47363e..5a4d55e0e3cb 100644 --- a/aten/src/ATen/native/TensorProperties.cpp +++ b/aten/src/ATen/native/TensorProperties.cpp @@ -113,7 +113,7 @@ Tensor& detach_(Tensor& self) { } Tensor contiguous(const Tensor& self, MemoryFormat memory_format) { - if (self.is_contiguous_or_false(memory_format)) { + if (self.is_contiguous(memory_format)) { return self; } TORCH_CHECK( diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 958c80f2c7f0..921c7d9c744e 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -1998,18 +1998,19 @@ Tensor reshape_symint(const Tensor& self, c10::SymIntArrayRef proposed_shape) { TORCH_CHECK(false, "reshape is not implemented for sparse tensors"); } - if (self.is_contiguous_or_false() && !self.is_mkldnn()) { + auto sym_sizes = self.sym_sizes(); + auto sym_strides = self.sym_strides(); + auto sym_numel = self.sym_numel(); + if (definitely_contiguous(sym_sizes, sym_strides, sym_numel) && + !self.is_mkldnn()) { return self.view_symint(proposed_shape); } - auto sym_numel = self.sym_numel(); c10::SymDimVector shape = infer_size_dv(proposed_shape, sym_numel); if (self.is_mkldnn()) { return at::_mkldnn_reshape(self, C10_AS_INTARRAYREF_SLOW(shape)); } - auto sym_sizes = self.sym_sizes(); - auto sym_strides = self.sym_strides(); // `computeStride` returns the proper strides to use if this // `reshape` can be just a view. diff --git a/aten/src/ATen/native/metal/MetalTensorImpl.h b/aten/src/ATen/native/metal/MetalTensorImpl.h index 44152dd3c6d0..2fb87b2f4f89 100644 --- a/aten/src/ATen/native/metal/MetalTensorImpl.h +++ b/aten/src/ATen/native/metal/MetalTensorImpl.h @@ -35,7 +35,7 @@ struct TORCH_API MetalTensorImpl : public OpaqueTensorImpl { return c10::fromIntArrayRefKnownNonNegative(strides_); } - c10::SymBool sym_is_contiguous_custom(c10::MemoryFormat memory_format) const override { + bool is_contiguous_custom(c10::MemoryFormat memory_format) const override { return true; } diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index 8647a199ad8e..332d4a2ebfe9 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -776,7 +776,7 @@ Tensor scaled_dot_product_attention( #ifdef USE_MPS const auto any_nested = query_.is_nested() || key.is_nested() || value.is_nested(); const bool any_inputs_require_grad = query_.requires_grad() || key.requires_grad() || value.requires_grad(); - const auto all_contiguous = query_.is_contiguous_or_false() && key.is_contiguous_or_false() && value.is_contiguous_or_false(); + const auto all_contiguous = query_.is_contiguous() && key.is_contiguous() && value.is_contiguous(); if (query_device_type == DeviceType::MPS && dropout_p == 0.0 && !(GradMode::is_enabled() && any_inputs_require_grad) && (all_contiguous || mps::is_macos_13_or_newer(mps::MacOSVersion::MACOS_VER_15_0_PLUS)) diff --git a/aten/src/ATen/native/vulkan/VulkanOpaqueTensorImpl.h b/aten/src/ATen/native/vulkan/VulkanOpaqueTensorImpl.h index 532caa62687a..04823c592ccb 100644 --- a/aten/src/ATen/native/vulkan/VulkanOpaqueTensorImpl.h +++ b/aten/src/ATen/native/vulkan/VulkanOpaqueTensorImpl.h @@ -33,8 +33,7 @@ struct VulkanOpaqueTensorImpl : public OpaqueTensorImpl { return c10::fromIntArrayRefKnownNonNegative(strides_); } - c10::SymBool sym_is_contiguous_custom( - c10::MemoryFormat memory_format) const override { + bool is_contiguous_custom(c10::MemoryFormat memory_format) const override { (void)memory_format; return true; } diff --git a/c10/core/Contiguity.h b/c10/core/Contiguity.h index 279a795583b1..276d2ce07b57 100644 --- a/c10/core/Contiguity.h +++ b/c10/core/Contiguity.h @@ -12,7 +12,7 @@ namespace c10 { template bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { - if (numel == 0) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(numel, 0))) { return true; } @@ -20,11 +20,11 @@ bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { // NB: make sure we do signed arithmetic for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) { const auto& size_d = sizes[d]; - if (size_d == 1) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(size_d, 1))) { continue; } - if (strides[d] != expected_stride) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(strides[d], expected_stride))) { return false; } expected_stride *= size_d; @@ -32,66 +32,29 @@ bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { return true; } -// Return a SymBool with underlying symbolic expression that represents -// contiguity. Guaranteed not to add guards. -inline static c10::SymBool _compute_contiguous_sym( - ArrayRef sizes, - ArrayRef strides, - const c10::SymInt& numel) { - // If this return true, the tensor is contiguous indeed. Otherwise it could be - // either. - auto is_contiguous_or_false = [&]() { - if (TORCH_GUARD_OR_FALSE(sym_eq(numel, 0))) { - return true; - } - - // When calculating the expected stride, we can choose to multiply - // with max(1, size[d]) or size[d]. Regardless, this is ok for this - // function. Why? - // (1) If size[d] == 0, then the tensor is contiguous and if - // we return true or false it won't break this function. - // (2) If size[d] is not 0, then max(1,size[d]) and size[d] are equal. - // Therefore, if we choose to use max(1, size[d]) or size[d] to - // calculate the expected stride, the result is the same. - // - // We symbolically check both paths to maximize the cases where this - // function returns true. This is because make_contiguous_strides_for adds - // the max symbolically, and in some other situations the max might not be - // there. And we want to ensure we return true in both cases. - c10::SymInt expected_stride = 1; - c10::SymInt expected_stride_max = 1; - // NB: make sure we do signed arithmetic - for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) { - if (TORCH_GUARD_OR_FALSE(sym_eq(sizes[d], 1))) { - continue; - } - - if (TORCH_GUARD_OR_TRUE(sym_ne(strides[d], expected_stride)) && - TORCH_GUARD_OR_TRUE(sym_ne(strides[d], expected_stride_max))) { - return false; - } - expected_stride_max *= sizes[d].max(1); - expected_stride *= sizes[d]; - } +// This function will return True if the tensor is contiguous, and False if the +// its not or if we can't determine if it is contiguous due to unbacked symbols +// (it could be either in that case based on the actual runtime data). +template +bool definitely_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { + if (TORCH_GUARD_OR_FALSE(sym_eq(numel, 0))) { return true; - }; - - if (is_contiguous_or_false()) { - return c10::SymBool(true); } - // Build a single expression that represents contiguity and return it. - c10::SymBool is_empty = sym_eq(numel, 0); - c10::SymBool is_contiguous_cond = true; - - c10::SymInt expected_stride = 1; + T expected_stride = 1; + // NB: make sure we do signed arithmetic for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) { const auto& size_d = sizes[d]; - is_contiguous_cond = is_contiguous_cond.sym_and( - size_d.sym_eq(1).sym_or(sym_eq(strides[d], expected_stride))); - expected_stride = expected_stride * size_d; + if (TORCH_GUARD_OR_FALSE(sym_eq(size_d, 1))) { + continue; + } + + if (TORCH_GUARD_OR_TRUE(sym_ne(strides[d], expected_stride))) { + return false; + } + expected_stride *= size_d; } - return is_contiguous_cond.sym_or(is_empty); + return true; } template diff --git a/c10/core/SymbolicShapeMeta.cpp b/c10/core/SymbolicShapeMeta.cpp index 6fa2ab0ed4f1..3becf927cd52 100644 --- a/c10/core/SymbolicShapeMeta.cpp +++ b/c10/core/SymbolicShapeMeta.cpp @@ -79,51 +79,18 @@ SymBool SymbolicShapeMeta::compute_contiguous() const { } c10::SymIntArrayRef sizes(sizes_); c10::SymIntArrayRef strides(strides_); - - auto result = _compute_contiguous_sym(sizes, strides, numel()); - - // If the result is already determined without guarding, just return it. - auto maybe_as_bool = result.maybe_as_bool(); - if (maybe_as_bool.has_value()) { - return maybe_as_bool.value(); - } - - auto all_hinted = true; - for (const auto& s : sizes) { - if (!s.has_hint()) { - all_hinted = false; - break; - } - } - - if (all_hinted) { - for (const auto& s : strides) { - if (!s.has_hint()) { - all_hinted = false; - break; - } - } - } - - if (all_hinted) { - // We avoid going through the slow path if everything is hinted, - // because evaluating a large SymPy expression can be expensive. - // TODO exclude backed_size_oblivious from this path. - return _compute_contiguous(sizes_, strides_, numel()); - } - - return result; + return _compute_contiguous(sizes, strides, numel()); } // The rest of them -#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \ - SymBool SymbolicShapeMeta::name() const { \ - if (!strides_valid_) { \ - return false; \ - } \ - c10::SymIntArrayRef sizes(sizes_); \ - c10::SymIntArrayRef strides(strides_); \ - return fallback(sizes, strides); \ +#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \ + SymBool SymbolicShapeMeta::name() const { \ + if (!strides_valid_) { \ + return false; \ + } \ + c10::SymIntArrayRef sizes(sizes_); \ + c10::SymIntArrayRef strides(strides_); \ + return fallback(sizes, strides); \ } #define DEFINE_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \ @@ -143,13 +110,11 @@ SymBool SymbolicShapeMeta::compute_contiguous() const { } // clang-format off -DEFINE_EAGER_SYMBOOL_COMPUTE(compute_channels_last_contiguous_2d, _compute_channels_last_contiguous_2d) -DEFINE_EAGER_SYMBOOL_COMPUTE(compute_channels_last_contiguous_3d, _compute_channels_last_contiguous_3d) -DEFINE_EAGER_SYMBOOL_COMPUTE(compute_strides_like_channels_last_2d, is_channels_last_strides_2d) -DEFINE_EAGER_SYMBOOL_COMPUTE(compute_strides_like_channels_last_3d, is_channels_last_strides_3d) - +DEFINE_EAGER_SYMBOOL_COMPUTE(compute_channels_last_contiguous_2d, is_channels_last_contiguous_2d, _compute_channels_last_contiguous_2d) +DEFINE_EAGER_SYMBOOL_COMPUTE(compute_channels_last_contiguous_3d, is_channels_last_contiguous_3d, _compute_channels_last_contiguous_3d) +DEFINE_EAGER_SYMBOOL_COMPUTE(compute_strides_like_channels_last_2d, is_channels_last_strides_2d, is_channels_last_strides_2d) +DEFINE_EAGER_SYMBOOL_COMPUTE(compute_strides_like_channels_last_3d, is_channels_last_strides_3d, is_channels_last_strides_3d) DEFINE_SYMBOOL_COMPUTE(compute_non_overlapping_and_dense, is_non_overlapping_and_dense, _compute_non_overlapping_and_dense) - // clang-format on #undef DEFINE_SYMBOOL_COMPUTE @@ -227,7 +192,6 @@ void SymbolicShapeMeta::set_numel(SymInt val) const { numel_ = std::move(val); available_.fetch_or(numel_avail); } - void SymbolicShapeMeta::set_is_contiguous(SymBool val) const { std::scoped_lock lock(mutables_); if (has_is_contiguous()) { @@ -236,7 +200,6 @@ void SymbolicShapeMeta::set_is_contiguous(SymBool val) const { is_contiguous_ = std::move(val); available_.fetch_or(is_contiguous_avail); } - void SymbolicShapeMeta::set_is_channels_last_contiguous(SymBool val) const { std::scoped_lock lock(mutables_); if (has_is_channels_last_contiguous()) { @@ -245,7 +208,6 @@ void SymbolicShapeMeta::set_is_channels_last_contiguous(SymBool val) const { is_channels_last_contiguous_ = std::move(val); available_.fetch_or(is_channels_last_contiguous_avail); } - void SymbolicShapeMeta::set_is_channels_last_3d_contiguous(SymBool val) const { std::scoped_lock lock(mutables_); if (has_is_channels_last_3d_contiguous()) { @@ -254,7 +216,6 @@ void SymbolicShapeMeta::set_is_channels_last_3d_contiguous(SymBool val) const { is_channels_last_3d_contiguous_ = std::move(val); available_.fetch_or(is_channels_last_3d_contiguous_avail); } - void SymbolicShapeMeta::set_is_channels_last(SymBool val) const { std::scoped_lock lock(mutables_); if (has_is_channels_last()) { @@ -263,7 +224,6 @@ void SymbolicShapeMeta::set_is_channels_last(SymBool val) const { is_channels_last_ = std::move(val); available_.fetch_or(is_channels_last_avail); } - void SymbolicShapeMeta::set_is_channels_last_3d(SymBool val) const { std::scoped_lock lock(mutables_); if (has_is_channels_last_3d()) { diff --git a/c10/core/SymbolicShapeMeta.h b/c10/core/SymbolicShapeMeta.h index 0820038968a8..ce0769a8074f 100644 --- a/c10/core/SymbolicShapeMeta.h +++ b/c10/core/SymbolicShapeMeta.h @@ -1,5 +1,4 @@ #pragma once -#include #include #include #include @@ -83,15 +82,6 @@ class C10_API SymbolicShapeMeta { return numel_; } - const SymBool& is_contiguous(at::MemoryFormat memory_format) const { - if (memory_format == at::MemoryFormat::ChannelsLast) { - return this->is_channels_last_contiguous(); - } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { - return this->is_channels_last_3d_contiguous(); - } - return this->is_contiguous(); - } - const SymBool& is_contiguous() const { if (C10_UNLIKELY(!has_is_contiguous())) { init_is_contiguous(); @@ -204,7 +194,6 @@ class C10_API SymbolicShapeMeta { // Lazily initialized variables, with the corresponding available_ flag // indicating whether the value has been initialized mutable std::atomic available_{0}; - enum avail { numel_avail = 1 << 0, is_contiguous_avail = 1 << 1, diff --git a/c10/core/TensorImpl.cpp b/c10/core/TensorImpl.cpp index f3ec2f2d46ea..f1c9eafb1797 100644 --- a/c10/core/TensorImpl.cpp +++ b/c10/core/TensorImpl.cpp @@ -310,14 +310,12 @@ void TensorImpl::throw_data_ptr_access_error() const { false, "Cannot access data pointer of Tensor that doesn't have storage"); } -c10::SymBool TensorImpl::sym_is_contiguous_custom( - at::MemoryFormat memory_format) const { +bool TensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const { if (C10_UNLIKELY(matches_python_custom(SizesStridesPolicy::CustomStrides))) { return pyobj_slot_.load_pyobj_interpreter()->is_contiguous( this, memory_format); } - - return sym_is_contiguous_default(memory_format); + return is_contiguous_default(memory_format); } bool TensorImpl::is_strides_like_custom(at::MemoryFormat memory_format) const { @@ -328,12 +326,12 @@ bool TensorImpl::is_strides_like_custom(at::MemoryFormat memory_format) const { return is_strides_like_default(memory_format); } -c10::SymBool TensorImpl::sym_is_non_overlapping_and_dense_custom() const { +bool TensorImpl::is_non_overlapping_and_dense_custom() const { if (C10_UNLIKELY(matches_python_custom(SizesStridesPolicy::CustomStrides))) { return pyobj_slot_.load_pyobj_interpreter()->is_non_overlapping_and_dense( this); } - return sym_is_non_overlapping_and_dense_default(); + return is_non_overlapping_and_dense_default(); } IntArrayRef TensorImpl::sizes_custom() const { diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h index 381bc65b27fb..d903bfa4e68a 100644 --- a/c10/core/TensorImpl.h +++ b/c10/core/TensorImpl.h @@ -812,43 +812,6 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { } } - c10::SymBool sym_is_contiguous( - at::MemoryFormat memory_format = at::MemoryFormat::Contiguous) const { - if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { - return sym_is_contiguous_custom(memory_format); - } - return sym_is_contiguous_default(memory_format); - } - - template - T is_contiguous_default_impl(at::MemoryFormat memory_format) const { - if (!has_symbolic_sizes_strides_) { - if (memory_format == at::MemoryFormat::ChannelsLast) { - return is_channels_last_contiguous_; - } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { - return is_channels_last_3d_contiguous_; - } - return is_contiguous_; - } - - // Handle dynamic shapes. - const auto& symbolic = symbolic_shape_meta().is_contiguous(memory_format); - - if constexpr (std::is_same_v) { - return symbolic.guard_bool(__FILE__, __LINE__); - } else { - return symbolic; - } - } - - bool is_contiguous_default(at::MemoryFormat memory_format) const { - return is_contiguous_default_impl(memory_format); - } - - c10::SymBool sym_is_contiguous_default(at::MemoryFormat memory_format) const { - return is_contiguous_default_impl(memory_format); - } - /** * Whether or not a tensor is laid out in contiguous memory. * @@ -864,6 +827,30 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { return is_contiguous_default(memory_format); } + // These are factored into separate functions in case subclasses + // want to use them + bool is_contiguous_default(at::MemoryFormat memory_format) const { + if (has_symbolic_sizes_strides_) { + if (memory_format == at::MemoryFormat::ChannelsLast) { + return symbolic_shape_meta().is_channels_last_contiguous().guard_bool( + __FILE__, __LINE__); + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return symbolic_shape_meta() + .is_channels_last_3d_contiguous() + .guard_bool(__FILE__, __LINE__); + } + return symbolic_shape_meta().is_contiguous().guard_bool( + __FILE__, __LINE__); + } + + if (memory_format == at::MemoryFormat::ChannelsLast) { + return is_channels_last_contiguous_; + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return is_channels_last_3d_contiguous_; + } + return is_contiguous_; + } + bool is_strides_like_default(at::MemoryFormat memory_format) const { if (has_symbolic_sizes_strides_) { if (memory_format == at::MemoryFormat::ChannelsLast) { @@ -886,17 +873,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { } } - SymBool sym_is_non_overlapping_and_dense_default() const { - if (has_symbolic_sizes_strides_) { - return symbolic_shape_meta().is_non_overlapping_and_dense(); - } else { - return is_non_overlapping_and_dense_; - } - } - bool is_non_overlapping_and_dense_default() const { if (has_symbolic_sizes_strides_) { - return sym_is_non_overlapping_and_dense_default().guard_bool( + return symbolic_shape_meta().is_non_overlapping_and_dense().guard_bool( __FILE__, __LINE__); } else { return is_non_overlapping_and_dense_; @@ -989,24 +968,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { * for a tensor to have rank, but not well defined sizes. */ // sizes_strides_policy_ >= CustomStrides - + virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const; virtual bool is_strides_like_custom(at::MemoryFormat memory_format) const; - - virtual c10::SymBool sym_is_non_overlapping_and_dense_custom() const; - - bool is_non_overlapping_and_dense_custom() const { - return sym_is_non_overlapping_and_dense_custom().guard_bool( - __FILE__, __LINE__); - } - - virtual c10::SymBool sym_is_contiguous_custom( - at::MemoryFormat memory_format) const; - - bool is_contiguous_custom(at::MemoryFormat memory_format) const { - return sym_is_contiguous_custom(memory_format) - .guard_bool(__FILE__, __LINE__); - } - + virtual bool is_non_overlapping_and_dense_custom() const; // sizes_strides_policy_ >= CustomSizes // Currently this method only exists to be overwritten by subclasses such as // NestedTensorImpl. @@ -1040,9 +1004,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { virtual c10::SymInt sym_storage_offset_custom() const; public: -/** - * True if this tensor has storage. See storage() for details. - */ + /** + * True if this tensor has storage. See storage() for details. + */ #ifdef DEBUG // Allow subclasses to check that their storage_ is never getting set in debug // builds. @@ -1052,11 +1016,11 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { #endif bool has_storage() const -// NOTE: we devirtualize this because it arguably shouldn't be an -// error just to ask subclasses if they have storage. -// This used to throw for most subclasses, but OpaqueTensorImpl -// wanted it to successfully return false, so we went ahead and made -// it a non-error. + // NOTE: we devirtualize this because it arguably shouldn't be an + // error just to ask subclasses if they have storage. + // This used to throw for most subclasses, but OpaqueTensorImpl + // wanted it to successfully return false, so we went ahead and made + // it a non-error. #ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY { return storage_; @@ -2483,11 +2447,6 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { return is_strides_like(at::MemoryFormat::ChannelsLast3d); } - bool is_non_overlapping_and_dense_or_false() const { - return sym_is_non_overlapping_and_dense().guard_or_false( - __FILE__, __LINE__); - } - bool is_non_overlapping_and_dense() const { if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { return is_non_overlapping_and_dense_custom(); @@ -2495,13 +2454,6 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { return is_non_overlapping_and_dense_default(); } - SymBool sym_is_non_overlapping_and_dense() const { - if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { - return sym_is_non_overlapping_and_dense_custom(); - } - return sym_is_non_overlapping_and_dense_default(); - } - // if this returns true, then it is guaranteed that this tensor has symbolic // sizes/strides bool has_symbolic_sizes_strides() const { diff --git a/c10/core/UndefinedTensorImpl.cpp b/c10/core/UndefinedTensorImpl.cpp index b42d3a92545f..d781ddf9e971 100644 --- a/c10/core/UndefinedTensorImpl.cpp +++ b/c10/core/UndefinedTensorImpl.cpp @@ -12,8 +12,7 @@ UndefinedTensorImpl::UndefinedTensorImpl() set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); } -c10::SymBool UndefinedTensorImpl::sym_is_contiguous_custom( - MemoryFormat format) const { +bool UndefinedTensorImpl::is_contiguous_custom(MemoryFormat format) const { return is_contiguous_default(format); } IntArrayRef UndefinedTensorImpl::strides_custom() const { diff --git a/c10/core/UndefinedTensorImpl.h b/c10/core/UndefinedTensorImpl.h index 6b7573a69388..33ac4e7f868a 100644 --- a/c10/core/UndefinedTensorImpl.h +++ b/c10/core/UndefinedTensorImpl.h @@ -32,7 +32,7 @@ struct C10_API UndefinedTensorImpl final : public TensorImpl { void set_storage_offset(int64_t offset) override; protected: - c10::SymBool sym_is_contiguous_custom(MemoryFormat format) const override; + bool is_contiguous_custom(MemoryFormat format) const override; IntArrayRef strides_custom() const override; SymIntArrayRef sym_strides_custom() const override; diff --git a/test/export/test_export.py b/test/export/test_export.py index 3bb2374e9db2..c50551216975 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -15440,48 +15440,6 @@ class TestExportCustomClass(TorchTestCase): MyModel(), inps, dynamic_shapes=spec, strict=True ).run_decompositions({}) - def test_unbacked_contiguous(self): - class MyModel(torch.nn.Module): - def forward(self, x, mask): - masked_select = x.masked_select(mask) - view = masked_select.view(-1, 1548) - contig = view.contiguous() - return contig + 1 - - example_inputs = ( - torch.randn((768, 1548), dtype=torch.bfloat16), - torch.randint(low=0, high=1, size=(768, 1), dtype=torch.bool), - ) - spec = { - "x": [Dim.STATIC, Dim.STATIC], - "mask": [Dim.STATIC, Dim.STATIC], - } - - traced = export(MyModel(), example_inputs, strict=True) - self.assertExpectedInline( - traced.graph_module.code, - """\ -def forward(self, x, mask): - masked_select = torch.ops.aten.masked_select.default(x, mask); x = mask = None - sym_size_int_1 = torch.ops.aten.sym_size.int(masked_select, 0) - sym_constrain_range_for_size_default = torch.ops.aten.sym_constrain_range_for_size.default(sym_size_int_1); sym_constrain_range_for_size_default = None - ge = sym_size_int_1 >= 0 - _assert_scalar_default = torch.ops.aten._assert_scalar.default(ge, "Runtime assertion failed for expression u0 >= 0 on node 'ge'"); ge = _assert_scalar_default = None - le = sym_size_int_1 <= 1188864 - _assert_scalar_default_1 = torch.ops.aten._assert_scalar.default(le, "Runtime assertion failed for expression u0 <= 1188864 on node 'le'"); le = _assert_scalar_default_1 = None - mod = sym_size_int_1 % 1548 - eq_2 = mod == 0; mod = None - _assert_scalar_default_2 = torch.ops.aten._assert_scalar.default(eq_2, "Runtime assertion failed for expression Eq(Mod(u0, 1548), 0) on node 'eq_2'"); eq_2 = _assert_scalar_default_2 = None - floordiv = sym_size_int_1 // 1548 - mul_2 = 1548 * floordiv; floordiv = None - eq_3 = sym_size_int_1 == mul_2; sym_size_int_1 = mul_2 = None - _assert_scalar_default_3 = torch.ops.aten._assert_scalar.default(eq_3, "Runtime assertion failed for expression Eq(u0, 1548*((u0//1548))) on node 'eq_3'"); eq_3 = _assert_scalar_default_3 = None - view = torch.ops.aten.view.default(masked_select, [-1, 1548]); masked_select = None - add = torch.ops.aten.add.Tensor(view, 1); view = None - return (add,)""", - ignore_empty_lines=True, - ) - if __name__ == "__main__": run_tests() diff --git a/test/test_dynamic_shapes.py b/test/test_dynamic_shapes.py index f9fc61af81d4..d3a6f4a1a272 100644 --- a/test/test_dynamic_shapes.py +++ b/test/test_dynamic_shapes.py @@ -3336,8 +3336,8 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)", _assert_scalar_4 = torch.ops.aten._assert_scalar.default(eq, "Runtime assertion failed for expression Eq(u2*u3, u0*u1) on node 'eq'"); eq = _assert_scalar_4 = None clone: "f32[u2, u3][Max(1, u3), 1]cpu" = torch.ops.aten.clone.default(arg3_1, memory_format = torch.contiguous_format); arg3_1 = None view: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.view.default(clone, [_local_scalar_dense, _local_scalar_dense_1]); clone = _local_scalar_dense = _local_scalar_dense_1 = None - mul_21: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.mul.Tensor(view, 10); view = None - return (mul_21,)""", # noqa: B950 + mul_19: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.mul.Tensor(view, 10); view = None + return (mul_19,)""", # noqa: B950 ignore_comments=True, ignore_empty_lines=True, ) @@ -3460,75 +3460,6 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)", func(torch.ones(5, 6, 9, 8)) self.assertEqual(cnt.frame_count, 3) - @skipIfTorchDynamo("not allowed to trace mark_unbacked") - @fresh_cache() - def test_unbacked_contiguous(self): - cnt = CompileCounterWithBackend("inductor") - - def func(x): - contig = x.contiguous() - return (contig + 1) * 100 - - compiled_func = torch.compile(fullgraph=True, backend=cnt, dynamic=True)(func) - - x = torch.randn(10, 10) - # make x not contiguous. - x = x.t_() - torch._dynamo.decorators.mark_unbacked(x, 0) - torch._dynamo.decorators.mark_unbacked(x, 1) - log_stream, ctx = logs_to_string( - "torch._inductor.compile_fx", "post_grad_graphs" - ) - with ctx(): - compiled_func(x) - self.assertEqual(compiled_func(x), func(x)) - y = torch.rand(20, 20).t() - self.assertEqual(compiled_func(y), func(y)) - self.assertEqual(cnt.frame_count, 1) - output = "\n".join(log_stream.getvalue().strip().split("\n")[4:]).strip() - self.assertExpectedInline( - output, - """\ - ge_1: "Sym(u0 >= 0)" = arg0_1 >= 0; arg0_1 = None - _assert_scalar = torch.ops.aten._assert_scalar.default(ge_1, "Runtime assertion failed for expression u0 >= 0 on node 'ge'"); ge_1 = _assert_scalar = None - ge_3: "Sym(u1 >= 0)" = arg1_1 >= 0; arg1_1 = None - _assert_scalar_1 = torch.ops.aten._assert_scalar.default(ge_3, "Runtime assertion failed for expression u1 >= 0 on node 'ge_1'"); ge_3 = _assert_scalar_1 = None - clone: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.clone.default(arg2_1, memory_format = torch.contiguous_format); arg2_1 = None - add_3: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.add.Tensor(clone, 1); clone = None - mul_6: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.mul.Tensor(add_3, 100); add_3 = None - return (mul_6,)""", # noqa: B950 - ignore_comments=True, - ignore_empty_lines=True, - ) - - log_stream, ctx = logs_to_string( - "torch._inductor.compile_fx", "post_grad_graphs" - ) - with ctx(): - # recompilation will happen due to stride specialization. - y = torch.rand(20, 20) - torch._dynamo.decorators.mark_unbacked(y, 0) - torch._dynamo.decorators.mark_unbacked(y, 1) - self.assertEqual(compiled_func(y), func(y)) - self.assertEqual(cnt.frame_count, 2) - - output = "\n".join(log_stream.getvalue().strip().split("\n")[4:]).strip() - - # No clone this time since input is contiguous. - self.assertExpectedInline( - output, - """\ - ge_1: "Sym(u0 >= 0)" = arg0_1 >= 0; arg0_1 = None - _assert_scalar = torch.ops.aten._assert_scalar.default(ge_1, "Runtime assertion failed for expression u0 >= 0 on node 'ge'"); ge_1 = _assert_scalar = None - ge_3: "Sym(u1 >= 0)" = arg1_1 >= 0; arg1_1 = None - _assert_scalar_1 = torch.ops.aten._assert_scalar.default(ge_3, "Runtime assertion failed for expression u1 >= 0 on node 'ge_1'"); ge_3 = _assert_scalar_1 = None - add: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.add.Tensor(arg2_1, 1); arg2_1 = None - mul_5: "f32[u0, u1][Max(1, u1), 1]cpu" = torch.ops.aten.mul.Tensor(add, 100); add = None - return (mul_5,)""", # noqa: B950 - ignore_comments=True, - ignore_empty_lines=True, - ) - instantiate_parametrized_tests(TestUnbacked) diff --git a/test/test_proxy_tensor.py b/test/test_proxy_tensor.py index 6d36b36996c4..4704c9992d5e 100644 --- a/test/test_proxy_tensor.py +++ b/test/test_proxy_tensor.py @@ -1370,8 +1370,8 @@ def forward(self, crop_camera_1, mask_1): view_1 = torch.ops.aten.view.default(expand_1, [sym_size_int, sym_size_int_1, sym_size_int_2]); expand_1 = sym_size_int_1 = sym_size_int_2 = None bmm = torch.ops.aten.bmm.default(view, view_1); view = view_1 = None view_2 = torch.ops.aten.view.default(bmm, [sym_size_int, 3, 3]); bmm = None - mul_9 = sym_size_int * 3 - view_3 = torch.ops.aten.view.default(view_2, [mul_9, 3]); view_2 = mul_9 = None + mul_6 = sym_size_int * 3 + view_3 = torch.ops.aten.view.default(view_2, [mul_6, 3]); view_2 = mul_6 = None mm = torch.ops.aten.mm.default(view_3, eye); view_3 = eye = None _unsafe_view = torch.ops.aten._unsafe_view.default(mm, [sym_size_int, 3, 3]); mm = sym_size_int = None index_put_ = torch.ops.aten.index_put_.default(crop_camera_1, [mask_1], _unsafe_view); crop_camera_1 = mask_1 = _unsafe_view = index_put_ = None diff --git a/tools/autograd/templates/python_variable_methods.cpp b/tools/autograd/templates/python_variable_methods.cpp index bfc5b80835c4..cd7bc0281981 100644 --- a/tools/autograd/templates/python_variable_methods.cpp +++ b/tools/autograd/templates/python_variable_methods.cpp @@ -264,7 +264,7 @@ static PyObject * THPVariable_contiguous(PyObject* self, PyObject* args, PyObjec auto& self_ = THPVariable_Unpack(self); auto memory_format = r.memoryformat(0); // avoids touching the GIL or current device if self is already contiguous - if (self_.is_contiguous_or_false(memory_format)) { + if (self_.is_contiguous(memory_format)) { // NOTE: this logic is duplicated from VariableType.cpp. Since we need to // record this call to contiguous() in the trace regardless of whether // we actually call contiguous here, we need to record this information diff --git a/torch/csrc/lazy/core/tensor_impl.cpp b/torch/csrc/lazy/core/tensor_impl.cpp index ce49338936e3..04730d552952 100644 --- a/torch/csrc/lazy/core/tensor_impl.cpp +++ b/torch/csrc/lazy/core/tensor_impl.cpp @@ -195,14 +195,13 @@ bool LTCTensorImpl::is_strides_like_custom( return false; } -c10::SymBool LTCTensorImpl::sym_is_non_overlapping_and_dense_custom() const { +bool LTCTensorImpl::is_non_overlapping_and_dense_custom() const { // This should be true, but false as a temporary fix for a PyTorch core issue, // according to https://github.com/pytorch/xla/pull/2682. return false; } -c10::SymBool LTCTensorImpl::sym_is_contiguous_custom( - c10::MemoryFormat _unused) const { +bool LTCTensorImpl::is_contiguous_custom(c10::MemoryFormat _unused) const { // TODO(ezyang): I don't think this branch is actually necessary // TODO(ezyang): I don't think this logic is right, shouldn't we pass on // the memory format? diff --git a/torch/csrc/lazy/core/tensor_impl.h b/torch/csrc/lazy/core/tensor_impl.h index 02f68c01c6f4..d5e937fc3dc8 100644 --- a/torch/csrc/lazy/core/tensor_impl.h +++ b/torch/csrc/lazy/core/tensor_impl.h @@ -41,11 +41,10 @@ class TORCH_API LTCTensorImpl final : public c10::TensorImpl { int64_t numel_custom() const override; int64_t storage_offset_custom() const override; int64_t dim_custom() const override; + bool is_contiguous_custom(at::MemoryFormat memory_format) const override; bool is_strides_like_custom(at::MemoryFormat memory_format) const override; - c10::SymBool sym_is_non_overlapping_and_dense_custom() const override; + bool is_non_overlapping_and_dense_custom() const override; - c10::SymBool sym_is_contiguous_custom( - at::MemoryFormat memory_format) const override; c10::SymIntArrayRef sym_sizes_custom() const override; c10::SymIntArrayRef sym_strides_custom() const override; c10::SymInt sym_numel_custom() const override; diff --git a/torch/utils/_sympy/printers.py b/torch/utils/_sympy/printers.py index acfcc596bd49..e9c6b8b0e933 100644 --- a/torch/utils/_sympy/printers.py +++ b/torch/utils/_sympy/printers.py @@ -20,9 +20,6 @@ class ExprPrinter(StrPrinter): def _print_Mul(self, expr: sympy.Expr) -> str: return self.stringify(expr.args, "*", precedence(expr)) - def _print_Not(self, expr: sympy.Expr) -> str: - return f"not ({self._print(expr.args[0])})" - def _print_Add(self, expr: sympy.Expr, order: Optional[str] = None) -> str: return self.stringify(expr.args, " + ", precedence(expr))