Fix typos in multiple files (#152254)

Fix typos in multiple files Pull Request resolved: https://github.com/pytorch/pytorch/pull/152254 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2025-04-26 17:18:35 +00:00
parent 6a62356857
commit 9480ed4cd3
10 changed files with 15 additions and 15 deletions
--- a/aten/src/ATen/native/LinearAlgebra.cpp
+++ b/aten/src/ATen/native/LinearAlgebra.cpp
@ -2652,7 +2652,7 @@ Tensor mexp_impl(
    // `norm_cpu` is used to decide which Tensors require which approximation
    // based on their norm. This decision takes place on CPU.
    // It requires moving data back and forth between devices when `a` is on CUDA,
-    // but at the cost of only one sigle CPU-CUDA synchronization (instead of 6),
+    // but at the cost of only one single CPU-CUDA synchronization (instead of 6),
    // and better performance overall (benchmarked).
    const auto norm_cpu = (a.device().type() == at::kCUDA)
      ? norm.to(at::kCPU) : norm;
--- a/aten/src/ATen/native/NaiveConvolutionTranspose3d.cpp
+++ b/aten/src/ATen/native/NaiveConvolutionTranspose3d.cpp
@ -296,7 +296,7 @@ void slow_conv_transpose3d_out_cpu_template(
        int64_t elt;
        // For each elt in batch, do:
        for (elt = 0; elt < batch_size; ++elt) {
-          // Matrix mulitply per output:
+          // Matrix multiply per output:
          input_n = input.select(0, elt);
          output_n = output.select(0, elt);

@ -520,7 +520,7 @@ void slow_conv_transpose3d_backward_out_cpu_template(
        int64_t elt;
        // For each elt in batch, do:
        for (elt = 0; elt < batch_size; ++elt) {
-          // Matrix mulitply per sample:
+          // Matrix multiply per sample:
          grad_input_n = grad_input.select(0, elt);
          grad_output_n = grad_output.select(0, elt);

@ -736,12 +736,12 @@ void slow_conv_transpose3d_acc_grad_parameters_cpu(
        int64_t elt;
        // For each elt in batch, do:
        for (elt = 0; elt < batch_size; ++elt) {
-          // Matrix mulitply per output:
+          // Matrix multiply per output:
          grad_output_n = grad_output.select(0, elt);

          // Do Weight:
          if (grad_weight.defined()) {
-            // Matrix mulitply per output:
+            // Matrix multiply per output:
            input_n = input.select(0, elt);

            if (need_columns) {
--- a/aten/src/ATen/native/SobolEngineOpsUtils.cpp
+++ b/aten/src/ATen/native/SobolEngineOpsUtils.cpp
@ -31,7 +31,7 @@ is present in the working directory). For additional details see [1].
    # read in as dataframe, explicitly use zero values
    df = pd.DataFrame(rows).fillna(0).astype(int)

-    # peform conversion according to Section 2.1 of [1]
+    # perform conversion according to Section 2.1 of [1]
    df["poly"] = 2 * df["a"] + 2 ** df["s"] + 1

    # ensure columns are properly ordered
--- a/aten/src/ATen/native/SoftMax.cpp
+++ b/aten/src/ATen/native/SoftMax.cpp
@ -559,7 +559,7 @@ Tensor masked_softmax_cpu(const Tensor& input_, const Tensor& mask_, const std::
      TORCH_CHECK((input_.sizes()[0] == mask.sizes()[0]) && (input_.sizes()[2] == mask.sizes()[1]),
                  "For mask_type == 1 mask shape should be (B, L)");
      if (dim_ != input_.dim() - 1) {
-            // We only process padding mask in the optimized way if softmax is applied along the last dimesion,
+            // We only process padding mask in the optimized way if softmax is applied along the last dimension,
            // otherwise we need to expand the mask into a generic 4D one
            mask = mask_.view({input_.sizes()[0], 1, 1, input_.sizes()[2]});
            mask = mask.expand(input_.sizes()).contiguous();
@ -570,7 +570,7 @@ Tensor masked_softmax_cpu(const Tensor& input_, const Tensor& mask_, const std::
      TORCH_CHECK((mask.dim() == 2) && (input_.sizes()[2] == mask.sizes()[0]) && (input_.sizes()[2] == mask.sizes()[1]),
                  "For mask_type == 0 mask shape should be (L, L)");
      if (dim_ != input_.dim() - 1) {
-            // We only process attention mask in a optimized way if softmax is applied along the last dimesion,
+            // We only process attention mask in a optimized way if softmax is applied along the last dimension,
            // otherwise we need to expand the mask into a generic 4D one
            mask = mask.view({1, 1, input_.sizes()[2], input_.sizes()[2]});
            mask = mask.expand(input_.sizes()).contiguous();
--- a/aten/src/ATen/native/TensorConversions.cpp
+++ b/aten/src/ATen/native/TensorConversions.cpp
@ -1693,7 +1693,7 @@ static Tensor sparse_compressed_to_flipped(

  // Step 4:
  // Convert the COO indices to the CSC/BSC indices and form the output.
-  // We need to sort COO indices along the "tranposed" dim to satisfy the
+  // We need to sort COO indices along the "transposed" dim to satisfy the
  // invariant of sorted plain indices.
  // Hash coo indices by converting 2d indices to linear offsets with
  // more "weight" (aka stride) placed on the "transposed" dimension.
--- a/aten/src/ATen/native/TensorShape.cpp
+++ b/aten/src/ATen/native/TensorShape.cpp
@ -572,7 +572,7 @@ Tensor sparse_broadcast_to(const Tensor& self, IntArrayRef size) {
  // }

  // Then define for each sparse dim the number of reps for each nnz index/value
-  // due to broadcasting. Repetitions do not take into accout the current value
+  // due to broadcasting. Repetitions do not take into account the current value
  // of nnz - this will be taken care of later {
  auto nnz_repeats = c10::DimVector(res_sparse_dim);
  nnz_repeats.back() = res_sparse_dim_broadcast_mask.back();
@ -3601,7 +3601,7 @@ Tensor& transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
  // in-place operations. For other sparse formats, the in-place
  // transpose would not be possible without shuffling the specified
  // values. So we don't support this as it would defeat the purpose
-  // of in-place opreations of being memory-efficient.
+  // of in-place operations of being memory-efficient.
  if (self.is_sparse()) {
    return sparse_transpose_(self, dim0, dim1);
  }
--- a/aten/src/ATen/native/UpSample.h
+++ b/aten/src/ATen/native/UpSample.h
@ -341,7 +341,7 @@ inline int64_t nearest_idx(
    int64_t input_size,
    int64_t output_size,
    std::optional<double> scales) {
-  // This method specificly treats cases: output_size == input_size or
+  // This method specifically treats cases: output_size == input_size or
  // output_size == 2 * input_size, that we would like to get rid of
  // We keep this method for BC and consider as deprecated.
  // See nearest_exact_idx as replacement
--- a/aten/src/ATen/native/cpu/AdaptiveMaxPoolKernel.cpp
+++ b/aten/src/ATen/native/cpu/AdaptiveMaxPoolKernel.cpp
@ -585,7 +585,7 @@ cpu_adaptive_max_pool3d_channels_last(
  using Vec = vec::Vectorized<scalar_t>;
  using integer_t = vec::int_same_size_t<scalar_t>;
  using iVec = vec::Vectorized<integer_t>;
-  // for the convience of vectorization, use integer of the same size of scalar_t,
+  // for the convenience of vectorization, use integer of the same size of scalar_t,
  //   e.g. int32_t for float, int64_t for double
  // need to make sure doesn't overflow
  TORCH_CHECK(input_height * input_width <= std::numeric_limits<integer_t>::max());
--- a/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp
+++ b/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp
@ -386,7 +386,7 @@ void cpu_flash_attention(
    int64_t thresh_size = (dtype == at::ScalarType::BFloat16) ? 64 : 16;
    need_pack = kvSize >= thresh_size && qSize >= thresh_size;
    // When the number of gemm is greater than the number of pack,
-    // the pack overhead can be overlaped.
+    // the pack overhead can be overlapped.
    if (need_pack) {
      double pack_size = batchSize * num_head * kvSize * headSize;
      double qs_per_thread = (batchSize * num_head * qSlice + num_thread - 1) / num_thread;
--- a/aten/src/ATen/native/vulkan/api/Allocator.h
+++ b/aten/src/ATen/native/vulkan/api/Allocator.h
@ -41,7 +41,7 @@
 */
 #endif /* VULKAN_DEBUG */

-// Note: Do not try to use C10 convenience macors here, as this header is
+// Note: Do not try to use C10 convenience macros here, as this header is
 // included from ExecuTorch that does not want to have dependency on C10
 #ifdef __clang__
 #pragma clang diagnostic push