Fix typos in multiple files (#152254)

Fix typos in multiple files

Pull Request resolved: https://github.com/pytorch/pytorch/pull/152254
Approved by: https://github.com/Skylion007
This commit is contained in:
co63oc
2025-04-26 17:18:35 +00:00
committed by PyTorch MergeBot
parent 6a62356857
commit 9480ed4cd3
10 changed files with 15 additions and 15 deletions

View File

@ -2652,7 +2652,7 @@ Tensor mexp_impl(
// `norm_cpu` is used to decide which Tensors require which approximation
// based on their norm. This decision takes place on CPU.
// It requires moving data back and forth between devices when `a` is on CUDA,
// but at the cost of only one sigle CPU-CUDA synchronization (instead of 6),
// but at the cost of only one single CPU-CUDA synchronization (instead of 6),
// and better performance overall (benchmarked).
const auto norm_cpu = (a.device().type() == at::kCUDA)
? norm.to(at::kCPU) : norm;

View File

@ -296,7 +296,7 @@ void slow_conv_transpose3d_out_cpu_template(
int64_t elt;
// For each elt in batch, do:
for (elt = 0; elt < batch_size; ++elt) {
// Matrix mulitply per output:
// Matrix multiply per output:
input_n = input.select(0, elt);
output_n = output.select(0, elt);
@ -520,7 +520,7 @@ void slow_conv_transpose3d_backward_out_cpu_template(
int64_t elt;
// For each elt in batch, do:
for (elt = 0; elt < batch_size; ++elt) {
// Matrix mulitply per sample:
// Matrix multiply per sample:
grad_input_n = grad_input.select(0, elt);
grad_output_n = grad_output.select(0, elt);
@ -736,12 +736,12 @@ void slow_conv_transpose3d_acc_grad_parameters_cpu(
int64_t elt;
// For each elt in batch, do:
for (elt = 0; elt < batch_size; ++elt) {
// Matrix mulitply per output:
// Matrix multiply per output:
grad_output_n = grad_output.select(0, elt);
// Do Weight:
if (grad_weight.defined()) {
// Matrix mulitply per output:
// Matrix multiply per output:
input_n = input.select(0, elt);
if (need_columns) {

View File

@ -31,7 +31,7 @@ is present in the working directory). For additional details see [1].
# read in as dataframe, explicitly use zero values
df = pd.DataFrame(rows).fillna(0).astype(int)
# peform conversion according to Section 2.1 of [1]
# perform conversion according to Section 2.1 of [1]
df["poly"] = 2 * df["a"] + 2 ** df["s"] + 1
# ensure columns are properly ordered

View File

@ -559,7 +559,7 @@ Tensor masked_softmax_cpu(const Tensor& input_, const Tensor& mask_, const std::
TORCH_CHECK((input_.sizes()[0] == mask.sizes()[0]) && (input_.sizes()[2] == mask.sizes()[1]),
"For mask_type == 1 mask shape should be (B, L)");
if (dim_ != input_.dim() - 1) {
// We only process padding mask in the optimized way if softmax is applied along the last dimesion,
// We only process padding mask in the optimized way if softmax is applied along the last dimension,
// otherwise we need to expand the mask into a generic 4D one
mask = mask_.view({input_.sizes()[0], 1, 1, input_.sizes()[2]});
mask = mask.expand(input_.sizes()).contiguous();
@ -570,7 +570,7 @@ Tensor masked_softmax_cpu(const Tensor& input_, const Tensor& mask_, const std::
TORCH_CHECK((mask.dim() == 2) && (input_.sizes()[2] == mask.sizes()[0]) && (input_.sizes()[2] == mask.sizes()[1]),
"For mask_type == 0 mask shape should be (L, L)");
if (dim_ != input_.dim() - 1) {
// We only process attention mask in a optimized way if softmax is applied along the last dimesion,
// We only process attention mask in a optimized way if softmax is applied along the last dimension,
// otherwise we need to expand the mask into a generic 4D one
mask = mask.view({1, 1, input_.sizes()[2], input_.sizes()[2]});
mask = mask.expand(input_.sizes()).contiguous();

View File

@ -1693,7 +1693,7 @@ static Tensor sparse_compressed_to_flipped(
// Step 4:
// Convert the COO indices to the CSC/BSC indices and form the output.
// We need to sort COO indices along the "tranposed" dim to satisfy the
// We need to sort COO indices along the "transposed" dim to satisfy the
// invariant of sorted plain indices.
// Hash coo indices by converting 2d indices to linear offsets with
// more "weight" (aka stride) placed on the "transposed" dimension.

View File

@ -572,7 +572,7 @@ Tensor sparse_broadcast_to(const Tensor& self, IntArrayRef size) {
// }
// Then define for each sparse dim the number of reps for each nnz index/value
// due to broadcasting. Repetitions do not take into accout the current value
// due to broadcasting. Repetitions do not take into account the current value
// of nnz - this will be taken care of later {
auto nnz_repeats = c10::DimVector(res_sparse_dim);
nnz_repeats.back() = res_sparse_dim_broadcast_mask.back();
@ -3601,7 +3601,7 @@ Tensor& transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
// in-place operations. For other sparse formats, the in-place
// transpose would not be possible without shuffling the specified
// values. So we don't support this as it would defeat the purpose
// of in-place opreations of being memory-efficient.
// of in-place operations of being memory-efficient.
if (self.is_sparse()) {
return sparse_transpose_(self, dim0, dim1);
}

View File

@ -341,7 +341,7 @@ inline int64_t nearest_idx(
int64_t input_size,
int64_t output_size,
std::optional<double> scales) {
// This method specificly treats cases: output_size == input_size or
// This method specifically treats cases: output_size == input_size or
// output_size == 2 * input_size, that we would like to get rid of
// We keep this method for BC and consider as deprecated.
// See nearest_exact_idx as replacement

View File

@ -585,7 +585,7 @@ cpu_adaptive_max_pool3d_channels_last(
using Vec = vec::Vectorized<scalar_t>;
using integer_t = vec::int_same_size_t<scalar_t>;
using iVec = vec::Vectorized<integer_t>;
// for the convience of vectorization, use integer of the same size of scalar_t,
// for the convenience of vectorization, use integer of the same size of scalar_t,
// e.g. int32_t for float, int64_t for double
// need to make sure doesn't overflow
TORCH_CHECK(input_height * input_width <= std::numeric_limits<integer_t>::max());

View File

@ -386,7 +386,7 @@ void cpu_flash_attention(
int64_t thresh_size = (dtype == at::ScalarType::BFloat16) ? 64 : 16;
need_pack = kvSize >= thresh_size && qSize >= thresh_size;
// When the number of gemm is greater than the number of pack,
// the pack overhead can be overlaped.
// the pack overhead can be overlapped.
if (need_pack) {
double pack_size = batchSize * num_head * kvSize * headSize;
double qs_per_thread = (batchSize * num_head * qSlice + num_thread - 1) / num_thread;

View File

@ -41,7 +41,7 @@
*/
#endif /* VULKAN_DEBUG */
// Note: Do not try to use C10 convenience macors here, as this header is
// Note: Do not try to use C10 convenience macros here, as this header is
// included from ExecuTorch that does not want to have dependency on C10
#ifdef __clang__
#pragma clang diagnostic push