[3/N] Fix extra warnings brought by clang-tidy-17 (#137552)

Follows #137459 Pull Request resolved: https://github.com/pytorch/pytorch/pull/137552 Approved by: https://github.com/ezyang
2025-10-20 21:14:14 +08:00 · 2024-10-15 02:33:42 +00:00
parent a6eb020522
commit 70206499f1
18 changed files with 77 additions and 80 deletions
--- a/aten/src/ATen/functorch/LegacyVmapTransforms.cpp
+++ b/aten/src/ATen/functorch/LegacyVmapTransforms.cpp
@ -29,7 +29,7 @@ static Tensor permuteBatchDimsToFront(const BatchedTensorImpl* batched) {
    if (is_bdim[ptr]) {
      continue;
    }
-    permutation[idx++] = ptr;
+    permutation[idx++] = static_cast<int64_t>(ptr);
  }
  return physical_tensor.permute(permutation);
 }
@ -43,7 +43,7 @@ VmapPhysicalView MultiBatchVmapTransform::logicalToPhysical(const Tensor& logica
 }

 int64_t VmapPhysicalView::numBatchDims() const {
-  return levels_.count();
+  return static_cast<int64_t>(levels_.count());
 }

 int64_t VmapPhysicalView::numLogicalDims() const {
@ -171,7 +171,7 @@ static Tensor moveDimToFrontAndUnsqueeze(Tensor tensor, std::optional<int64_t> d

 VmapPhysicalViewVec BroadcastingVmapTransform::logicalToPhysical(TensorList logical_tensors) {
  auto cur_level = maybeCurrentDynamicLayer().value().layerId();
-  auto bdim_size = -1;
+  int64_t bdim_size = -1;

  // Figure out the batch size first
  for (const auto& logical_tensor : logical_tensors) {
--- a/torch/csrc/distributed/autograd/engine/dist_engine.cpp
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
@ -366,7 +366,8 @@ void DistEngine::execute_graph_task_until_ready_queue_empty(
      // block and can be deallocated (release any references to grad tensors
      // as part of inputs_)
      NodeTask task = cpu_ready_queue->pop();
-      if (!(local_graph_task = task.base_.lock())) {
+      local_graph_task = task.base_.lock();
+      if (!local_graph_task) {
        continue;
      }
      if (task.fn_ && !local_graph_task->has_error_.load()) {
@ -629,11 +630,11 @@ size_t DistEngine::numBackwardPasses() const {
  return initializedContextIds_.size();
 }

-std::unordered_map<std::string, int> DistEngine::getDebugInfo() const {
-  std::unordered_map<std::string, int> debugInfo;
-  debugInfo[kNumBackwardPasses] = numBackwardPasses();
-  debugInfo[kNumAutogradContexts] =
-      DistAutogradContainer::getInstance().numAutogradContexts();
+std::unordered_map<std::string, int64_t> DistEngine::getDebugInfo() const {
+  std::unordered_map<std::string, int64_t> debugInfo;
+  debugInfo[kNumBackwardPasses] = static_cast<int64_t>(numBackwardPasses());
+  debugInfo[kNumAutogradContexts] = static_cast<int64_t>(
+      DistAutogradContainer::getInstance().numAutogradContexts());
  return debugInfo;
 }

--- a/torch/csrc/distributed/autograd/engine/dist_engine.h
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.h
@ -52,7 +52,7 @@ class TORCH_API DistEngine {

  // Returns key-value pairs consisting of useful debugging information related
  // to distributed autograd.
-  std::unordered_map<std::string, int> getDebugInfo() const;
+  std::unordered_map<std::string, int64_t> getDebugInfo() const;

  DistEngine(const DistEngine&) = delete;
  DistEngine& operator=(const DistEngine&) = delete;
--- a/torch/csrc/distributed/autograd/functions/sendrpc_backward.cpp
+++ b/torch/csrc/distributed/autograd/functions/sendrpc_backward.cpp
@ -3,6 +3,7 @@
 namespace torch::distributed::autograd {

 torch::autograd::variable_list SendRpcBackward::apply(
+    // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
    torch::autograd::variable_list&& inputs) {
  TORCH_INTERNAL_ASSERT(
      inputs.empty(), "SendRpcBackward should receive no inputs");
--- a/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.cpp
+++ b/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.cpp
@ -106,7 +106,7 @@ std::unique_ptr<RpcWithAutograd> RpcWithAutograd::fromMessage(
      static_cast<MessageType>(tupleElements[0].toInt());
  AutogradMetadata autogradMetadata(
      tupleElements[1].toInt(), tupleElements[2].toInt());
-  worker_id_t workerId = tupleElements[3].toInt();
+  worker_id_t workerId = static_cast<worker_id_t>(tupleElements[3].toInt());
  auto c10DeviceMap =
      tupleElements[4].to<c10::Dict<std::string, std::string>>();

--- a/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.cpp
+++ b/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.cpp
@ -116,7 +116,7 @@ std::unique_ptr<RpcWithProfilingResp> RpcWithProfilingResp::fromMessage(
  rpc::MessageType wrappedMsgType =
      static_cast<rpc::MessageType>(tupleElements[0].toInt());
  rpc::ProfilingId profilingId = rpc::ProfilingId::fromIValue(tupleElements[1]);
-  int profiledEventsSize = tupleElements[2].toInt();
+  auto profiledEventsSize = tupleElements[2].toInt();
  std::vector<torch::autograd::profiler::LegacyEvent> remoteEvents;
  remoteEvents.reserve(profiledEventsSize);
  for (const auto i : c10::irange(
--- a/torch/csrc/distributed/rpc/python_rpc_handler.cpp
+++ b/torch/csrc/distributed/rpc/python_rpc_handler.cpp
@ -23,7 +23,7 @@ constexpr auto kInternalModule = "torch.distributed.rpc.internal";
    auto dur = std::chrono::duration_cast<std::chrono::microseconds>(    \
        std::chrono::high_resolution_clock::now() - startTime);          \
    RpcAgent::getCurrentRpcAgent()->addGilWaitTime(dur);                 \
-  } // NOLINT
+  }

 // PythonTypeResolver that inherits from Script::Resolver to
 // support resolving types together with ScriptTypeParser.
--- a/torch/csrc/distributed/rpc/script_call.h
+++ b/torch/csrc/distributed/rpc/script_call.h
@ -61,6 +61,7 @@ class TORCH_API ScriptCall : public RpcCommandBase {
  // an annotated torchscript function defined by users.
  std::optional<const c10::QualifiedName> qualifiedName_;
  std::vector<at::IValue> stack_;
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
  const bool isAsyncExecution_;
 };

--- a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
@ -444,8 +444,8 @@ void TensorPipeAgent::startImpl() {
      }
      // Assign priorities in reverse order of occurrence in the vector, so that
      // a transport that comes before another receives a higher priority.
-      priority =
-          opts_.transports->size() - 1 - (iter - opts_.transports->begin());
+      priority = static_cast<std::ptrdiff_t>(opts_.transports->size()) - 1 -
+          (iter - opts_.transports->begin());
    }
    std::unique_ptr<TransportRegistration> reg =
        TensorPipeTransportRegistry()->Create(key);
@ -474,7 +474,8 @@ void TensorPipeAgent::startImpl() {
      }
      // Assign priorities in reverse order of occurrence in the vector, so
      // that a channel that comes before another receives a higher priority.
-      priority = opts_.channels->size() - 1 - (iter - opts_.channels->begin());
+      priority = static_cast<std::ptrdiff_t>(opts_.channels->size()) - 1 -
+          (iter - opts_.channels->begin());
    }
    std::unique_ptr<ChannelRegistration> reg =
        TensorPipeChannelRegistry()->Create(key);
--- a/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
@ -140,9 +140,11 @@ std::tuple<tensorpipe::Message, TensorpipeWriteBuffers> tensorpipeSerialize(
  buffers.type = std::make_unique<MessageType>(rpcMessage->type());
  buffers.id = std::make_unique<int64_t>(rpcMessage->id());
  // kTpMessageTypeIdx = 0
+  // NOLINTNEXTLINE(modernize-use-emplace)
  tpMessage.payloads.push_back(
      tensorpipe::Message::Payload{buffers.type.get(), sizeof(MessageType)});
  // kTpMessageIdIdx = 1
+  // NOLINTNEXTLINE(modernize-use-emplace)
  tpMessage.payloads.push_back(
      tensorpipe::Message::Payload{buffers.id.get(), sizeof(int64_t)});

@ -152,6 +154,7 @@ std::tuple<tensorpipe::Message, TensorpipeWriteBuffers> tensorpipeSerialize(
  // it uses non-const pointers even though it doesn't modify them when writing.
  char* payloadPtr = buffers.payload.data();
  // kTpMessagePayloadIdx = 2
+  // NOLINTNEXTLINE(modernize-use-emplace)
  tpMessage.payloads.push_back(
      tensorpipe::Message::Payload{payloadPtr, buffers.payload.size()});

@ -175,6 +178,7 @@ std::tuple<tensorpipe::Message, TensorpipeWriteBuffers> tensorpipeSerialize(
  pickler.pushIValue(buffers.tensors);
  pickler.stop();
  // kTpMessagePickleIdx = 3
+  // NOLINTNEXTLINE(modernize-use-emplace)
  tpMessage.payloads.push_back(tensorpipe::Message::Payload{
      buffers.pickle.data(), buffers.pickle.size()});
  const std::vector<torch::Tensor>& tensorDataVec = pickler.tensorData();
--- a/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.cpp
+++ b/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.cpp
@ -27,8 +27,11 @@ FaultyTensorPipeAgent::FaultyTensorPipeAgent(
          std::move(reverseDeviceMaps),
          std::move(devices),
          std::move(callback)),
+      // NOLINTNEXTLINE(bugprone-use-after-move)
      numFailSends_(opts.numFailSends),
+      // NOLINTNEXTLINE(bugprone-use-after-move)
      messageTypesToFail_(parseMessagesToFailInput(opts.messagesToFail)),
+      // NOLINTNEXTLINE(bugprone-use-after-move)
      messageTypesToDelay_(parseMessagesToDelay(opts.messagesToDelay)) {}

 std::vector<MessageType> FaultyTensorPipeAgent::parseMessagesToFailInput(
--- a/torch/csrc/distributed/rpc/testing/init.cpp
+++ b/torch/csrc/distributed/rpc/testing/init.cpp
@ -68,7 +68,7 @@ PyObject* faulty_agent_init(PyObject* _unused, PyObject* noargs) {
      module, "FaultyTensorPipeAgent", rpc_module.attr("TensorPipeAgent"))
      .def(
          py::init(
-              [](const c10::intrusive_ptr<::c10d::Store> store,
+              [](const c10::intrusive_ptr<::c10d::Store>& store,
                 std::string name,
                 worker_id_t rank,
                 int world_size,
--- a/torch/csrc/distributed/rpc/unpickled_python_call.cpp
+++ b/torch/csrc/distributed/rpc/unpickled_python_call.cpp
@ -13,7 +13,7 @@ UnpickledPythonCall::UnpickledPythonCall(
  pythonUdf_ = pythonRpcHandler.deserialize(serializedPyObj);
 }

-// NOTLINTNEXTLINE(bugprone-exception-escape)
+// NOLINTNEXTLINE(bugprone-exception-escape)
 UnpickledPythonCall::~UnpickledPythonCall() {
  // explicitly setting PyObject* to nullptr to prevent py::object's dtor to
  // decref on the PyObject again.
--- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp
+++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp
@ -10,17 +10,11 @@
 #include <fstream>
 #include <iostream>

-// TODO: Investigate why this is necessary, but fixes build problems in FRL
-#if __has_include("filesystem")
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
-
 #ifndef _WIN32
 #include <sys/stat.h>
+#else
+#include <filesystem>
+namespace fs = std::filesystem;
 #endif

 // TODO: C++17 has the filesystem header, which may replace these
@ -42,7 +36,7 @@ bool file_exists(std::string& path) {
 #ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc;
+  struct stat rc {};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }
--- a/torch/csrc/inductor/aoti_runner/model_container_runner.cpp
+++ b/torch/csrc/inductor/aoti_runner/model_container_runner.cpp
@ -5,17 +5,11 @@
 #include <torch/csrc/inductor/aoti_torch/oss_proxy_executor.h>
 #include <torch/csrc/inductor/aoti_torch/tensor_converter.h>

-// TODO: Investigate why this is necessary, but fixes build problems in FRL
-#if __has_include("filesystem")
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
-
 #ifndef _WIN32
 #include <sys/stat.h>
+#else
+#include <filesystem>
+namespace fs = std::filesystem;
 #endif

 namespace {
@ -23,7 +17,7 @@ bool file_exists(std::string& path) {
 #ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc;
+  struct stat rc {};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }
--- a/torch/csrc/inductor/aoti_torch/oss_proxy_executor.cpp
+++ b/torch/csrc/inductor/aoti_torch/oss_proxy_executor.cpp
@ -14,7 +14,7 @@ namespace torch::aot_inductor {

 void OSSProxyExecutor::prefill_stack_with_static_arguments(
    int index,
-    at::TypePtr schema_arg_type,
+    const at::TypePtr& schema_arg_type,
    const nlohmann::json& serialized_arg,
    OSSOpKernel& op_kernel) {
  auto& stack = op_kernel.stack_;
@ -33,7 +33,7 @@ void OSSProxyExecutor::prefill_stack_with_static_arguments(
    }
    case c10::TypeKind::IntType: {
      TORCH_CHECK(serialized_arg_type == "as_int");
-      stack.emplace_back(c10::IValue());
+      stack.emplace_back();
      dynamic_args.emplace_back(index, DynamicArgType::IntType, 1);
      break;
    }
@ -41,7 +41,7 @@ void OSSProxyExecutor::prefill_stack_with_static_arguments(
      TORCH_CHECK(
          serialized_arg_type == "as_int" ||
          serialized_arg_type == "as_sym_int");
-      stack.emplace_back(c10::IValue());
+      stack.emplace_back();
      dynamic_args.emplace_back(index, DynamicArgType::IntType, 1);
      break;
    }
@ -107,14 +107,14 @@ void OSSProxyExecutor::prefill_stack_with_static_arguments(
        TORCH_CHECK(serialized_arg_type == "as_ints");
        dynamic_args.emplace_back(
            index, DynamicArgType::ListIntType, serialized_arg_val.size());
-        stack.emplace_back(c10::IValue());
+        stack.emplace_back();
      } else if (schema_arg_type->isSubtypeOf(at::ListType::ofSymInts())) {
        TORCH_CHECK(
            serialized_arg_type == "as_ints" ||
            serialized_arg_type == "as_sym_ints");
        dynamic_args.emplace_back(
            index, DynamicArgType::ListIntType, serialized_arg_val.size());
-        stack.emplace_back(c10::IValue());
+        stack.emplace_back();
      } else if (schema_arg_type->isSubtypeOf(at::ListType::ofFloats())) {
        TORCH_CHECK(serialized_arg_type == "as_floats");
        std::vector<double> ret;
@ -133,7 +133,7 @@ void OSSProxyExecutor::prefill_stack_with_static_arguments(
        if (serialized_arg_type == "as_ints") {
          dynamic_args.emplace_back(
              index, DynamicArgType::ListIntType, serialized_arg_val.size());
-          stack.emplace_back(c10::IValue());
+          stack.emplace_back();
        } else if (serialized_arg_type == "as_floats") {
          std::vector<double> ret;
          for (const auto& arg : serialized_arg_val) {
@ -259,7 +259,7 @@ void OSSProxyExecutor::get_output_info_from_serialized(
    auto& serialized_output_val = serialized_output.begin().value();

    auto& schema_return = schema_returns[output_index];
-    at::TypePtr schema_return_type = schema_return.real_type();
+    const at::TypePtr& schema_return_type = schema_return.real_type();

    switch (schema_return_type->kind()) {
      case c10::TypeKind::TensorType: {
@ -408,7 +408,7 @@ void OSSProxyExecutor::call_function(
            list_item_types.has_value(),
            "Could not find list of item types for optional tensor list input");

-        for (std::string item_type : list_item_types.value()) {
+        for (const std::string& item_type : list_item_types.value()) {
          if (item_type == "as_tensor") {
            at::Tensor* tensor = tensor_handle_to_tensor_pointer(
                flatten_tensor_args[tensor_id++]);
@ -422,6 +422,7 @@ void OSSProxyExecutor::call_function(
      }
      case DynamicArgType::ListIntType: {
        std::vector<int64_t> vals;
+        vals.reserve(length);
        for (int j = 0; j < length; j++) {
          vals.push_back(flatten_int_args[int_id++]);
        }
@ -468,10 +469,10 @@ void OSSProxyExecutor::call_function(
        schema_return.type()->kind() == c10::TypeKind::ListType &&
        schema_return.type()->isSubtypeOf(at::ListType::ofTensors())) {
      auto tensors = stack[index++].toTensorList();
-      for (size_t i = 0; i < tensors.size(); ++i) {
+      for (auto&& t : tensors) {
        at::Tensor* tensor =
            tensor_handle_to_tensor_pointer(flatten_tensor_args[tensor_id++]);
-        *tensor = tensors[i];
+        *tensor = t;
      }
    } else {
      TORCH_CHECK(
--- a/torch/csrc/inductor/aoti_torch/oss_proxy_executor.h
+++ b/torch/csrc/inductor/aoti_torch/oss_proxy_executor.h
@ -82,7 +82,7 @@ class OSSProxyExecutor : public ProxyExecutor {
 private:
  void prefill_stack_with_static_arguments(
      int index,
-      at::TypePtr schema_arg_type,
+      const at::TypePtr& schema_arg_type,
      const nlohmann::json& serialized_arg,
      OSSOpKernel& op_kernel);

--- a/torch/csrc/inductor/aoti_torch/shim_common.cpp
+++ b/torch/csrc/inductor/aoti_torch/shim_common.cpp
@ -47,29 +47,26 @@

 #endif

-#if __has_include("filesystem")
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
-
 #ifndef _WIN32
-#include <limits.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <climits>
+
+#else
+#include <filesystem>
+namespace fs = std::filesystem;
 #endif

 // HACK for failed builds in ARVR, where it cannot find these symbols within
 // std::experimental::filesystem
 namespace {
 std::string get_current_path() {
-#if __has_include("filesystem") && !defined(__linux__)
+#ifdef _WIN32
  return fs::current_path().string();
 #else
-  char currentPath[PATH_MAX];
+  // NOLINTNEXTLINE(*array*)
+  char currentPath[PATH_MAX]{};
  if (getcwd(currentPath, sizeof(currentPath)) != nullptr) {
    return std::string(currentPath);
  } else {
@ -79,16 +76,16 @@ std::string get_current_path() {
 }

 bool file_exists(std::string& path) {
-#if __has_include("filesystem") && !defined(__linux__)
+#ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc;
+  struct stat rc {};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }

 bool create_directories(const std::string& path) {
-#if __has_include("filesystem") && !defined(__linux__)
+#ifdef _WIN32
  return fs::create_directories(path);
 #else
  if (mkdir(path.c_str(), 0777) == -1) {
@ -1055,11 +1052,11 @@ AOTI_TORCH_EXPORT void aoti_torch_save_tensor_handle(
  if (!file_exists(tmp_folder)) {
    std::cout
        << "aoti_torch_save_tensor_handle: Path does not exist, creating it..."
-        << tmp_folder << std::endl;
+        << tmp_folder << '\n';

    if (!create_directories(tmp_folder)) {
      std::cout << "aoti_torch_save_tensor_handle: Error creating directory: "
-                << tmp_folder << std::endl;
+                << tmp_folder << '\n';
      return;
    }
  }
@ -1068,11 +1065,11 @@ AOTI_TORCH_EXPORT void aoti_torch_save_tensor_handle(

  auto bytes = torch::jit::pickle_save(c10::IValue(*t));
  std::ofstream fout(tensor_filepath_to_save, std::ios::out | std::ios::binary);
-  fout.write(bytes.data(), bytes.size());
+  fout.write(bytes.data(), static_cast<std::streamsize>(bytes.size()));
  fout.close();

  std::cout << "aoti_torch_save_tensor_handle: Saved tensor to "
-            << tensor_filepath_to_save << std::endl;
+            << tensor_filepath_to_save << '\n';
 #endif // !defined(C10_MOBILE)
 }

@ -1087,7 +1084,7 @@ AOTI_TORCH_EXPORT void aoti_torch_print_tensor_handle(
    std::cout << "  " << msg;
  }
  std::cout << "  "
-            << "]:" << std::endl;
+            << "]:" << '\n';

  // Print exact tensor values for small size tensors
  const int64_t numel = t->numel();
@ -1096,8 +1093,8 @@ AOTI_TORCH_EXPORT void aoti_torch_print_tensor_handle(
  }

  // Print summary stats of the tensor
-  std::cout << "Number of elements: " << numel << std::endl;
-  std::cout << "Dtype: " << t->dtype() << std::endl;
+  std::cout << "Number of elements: " << numel << '\n';
+  std::cout << "Dtype: " << t->dtype() << '\n';
  if (numel > 0) {
    // torch/aten `mean()` function only supports float and complex dtypes
    // See:
@ -1109,24 +1106,24 @@ AOTI_TORCH_EXPORT void aoti_torch_print_tensor_handle(
        at::isComplexType(at::typeMetaToScalarType(t->dtype()));
    at::ScalarType float_dtype =
        is_complex_type ? at::kComplexFloat : at::kFloat;
-    std::cout << "Mean value: " << mean_value(float_dtype) << std::endl;
+    std::cout << "Mean value: " << mean_value(float_dtype) << '\n';
    if (!is_complex_type) {
      // "min_all_cuda" function is not implemented for 'ComplexFloat' type.
      // (similar for max) Skip printing min/max value for complex type tensors
      // here If encountered complex dtypes (rare occasions), suggest to print
      // out the whole value of the tensor.
-      std::cout << "Min value: " << t->min().item<float>() << std::endl;
-      std::cout << "Max value: " << t->max().item<float>() << std::endl;
+      std::cout << "Min value: " << t->min().item<float>() << '\n';
+      std::cout << "Max value: " << t->max().item<float>() << '\n';
    }
  }
-  std::cout << "Device: " << t->device() << std::endl;
-  std::cout << "Size: " << t->sizes() << std::endl;
-  std::cout << "Stride: " << t->strides() << std::endl;
-  std::cout << "Layout: " << t->layout() << std::endl;
-  std::cout << "Is contiguous: " << t->is_contiguous() << std::endl;
-  std::cout << "Requires grad: " << t->requires_grad() << std::endl;
+  std::cout << "Device: " << t->device() << '\n';
+  std::cout << "Size: " << t->sizes() << '\n';
+  std::cout << "Stride: " << t->strides() << '\n';
+  std::cout << "Layout: " << t->layout() << '\n';
+  std::cout << "Is contiguous: " << t->is_contiguous() << '\n';
+  std::cout << "Requires grad: " << t->requires_grad() << '\n';

-  std::cout << std::endl;
+  std::cout << '\n';
 }

 // ProxyExecutor