Fix typos under c10 directory (#98079)

This PR fixes typos in comments and messages of files under `c10` directory Pull Request resolved: https://github.com/pytorch/pytorch/pull/98079 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2023-03-31 18:31:07 +00:00
parent 762a2079c7
commit 64b8d20a5c
23 changed files with 30 additions and 30 deletions
--- a/c10/core/Device.h
+++ b/c10/core/Device.h
@ -17,7 +17,7 @@ namespace c10 {
 /// DeviceIndex directly.
 using DeviceIndex = int8_t;

-/// Represents a a compute device on which a tensor is located. A device is
+/// Represents a compute device on which a tensor is located. A device is
 /// uniquely identified by a type, which specifies the type of machine it is
 /// (e.g. CPU or CUDA GPU), and a device index or ordinal, which identifies the
 /// specific compute device when there is more than one of a certain type. The
@ -146,7 +146,7 @@ struct C10_API Device final {
    return type_ == DeviceType::CPU;
  }

-  /// Return true if the device supports arbirtary strides.
+  /// Return true if the device supports arbitrary strides.
  bool supports_as_strided() const noexcept {
    return type_ != DeviceType::IPU && type_ != DeviceType::XLA &&
        type_ != DeviceType::Lazy;
--- a/c10/core/DeviceType.cpp
+++ b/c10/core/DeviceType.cpp
@ -113,7 +113,7 @@ std::ostream& operator<<(std::ostream& stream, DeviceType type) {
 //     It's also totally fine for this to be slow, since it happens exactly once
 //     at import time.
 // (2) Atomic is needed during reading:
-//     Whenever a user prints a privatuse1 device name, they need to read this
+//     Whenever a user prints a privateuse1 device name, they need to read this
 //     variable. Although unlikely, we'll data race if someone else is trying to
 //     set this variable at the same time that another thread is print the
 //     device name. We could re-use the same mutex, but reading the atomic will
--- a/c10/core/DispatchKey.h
+++ b/c10/core/DispatchKey.h
@ -520,7 +520,7 @@ constexpr bool isAliasDispatchKey(DispatchKey k) {
 // [Note: Per-Backend Functionality Dispatch Keys]
 // Check if a DispatchKey is a per-backend functionality key
 // Any functionalities that can be customized per-backend should be added here.
-// These keys correspond to functionalities that can be customized indivually
+// These keys correspond to functionalities that can be customized individually
 // per backend. While they only take up one bit in the `DispatchKeySet` bitset,
 // they map to (# backends) slots in the operator table.
 // Each of these keys also has a separate set of "runtime keys" in the dispatch
--- a/c10/core/DispatchKeySet.cpp
+++ b/c10/core/DispatchKeySet.cpp
@ -259,7 +259,7 @@ std::array<FunctionalityOffsetAndMask, num_functionality_keys>
 initializeFunctionalityOffsetsAndMasks() {
  std::array<FunctionalityOffsetAndMask, num_functionality_keys>
      offsets_and_masks;
-  // manualy set the first entry, which corresponds to Undefined.
+  // manually set the first entry, which corresponds to Undefined.
  offsets_and_masks[0] = FunctionalityOffsetAndMask(0, 0);
  // loop through every functionality key (aside from Undefined).
  for (const auto functionality_idx : c10::irange(1, num_functionality_keys)) {
--- a/c10/core/DispatchKeySet.h
+++ b/c10/core/DispatchKeySet.h
@ -750,7 +750,7 @@ constexpr auto autograd_privateuse3_ks =
 constexpr auto autograd_other_ks = DispatchKeySet(DispatchKey::AutogradOther);
 constexpr auto autograd_nested =
    DispatchKeySet(DispatchKey::AutogradNestedTensor);
-// keyset correpsonding to functorch keys that have their own dedicated
+// keyset corresponding to functorch keys that have their own dedicated
 // TensorImpl subclass.
 constexpr auto functorch_transforms_ks = DispatchKeySet(
    {DispatchKey::FuncTorchBatched,
--- a/c10/core/Event.h
+++ b/c10/core/Event.h
@ -88,7 +88,7 @@ struct Event final {
  /**
   * Increments the event's version and enqueues a job with this version
   * in the stream's work queue. When the stream process that job
-   * it nofifies all streams waiting on / blocked by that version of the
+   * it notifies all streams waiting on / blocked by that version of the
   * event to continue and marks that version as recorded.
   * */
  void record(const Stream& stream) {
--- a/c10/core/GeneratorImpl.cpp
+++ b/c10/core/GeneratorImpl.cpp
@ -63,7 +63,7 @@ static uint64_t readURandomLong() {
 * /dev/urandom or the current time. For CUDA, gets random from
 * std::random_device and adds a transformation on it. For Intel SGX
 * platform use sgx_read_rand as reading from /dev/urandom is
- * prohibited on that platfrom.
+ * prohibited on that platform.
 *
 * FIXME: The behavior in this function is from legacy code
 * (THRandom_seed/THCRandom_seed) and is probably not the right thing to do,
--- a/c10/core/InferenceMode.h
+++ b/c10/core/InferenceMode.h
@ -46,7 +46,7 @@ struct C10_API InferenceMode {
  //
  // 3. Why does setting InferenceMode also set GradMode?
  //
-  //    This is required since InferenceMode is a faster and more restricive
+  //    This is required since InferenceMode is a faster and more restrictive
  //    version of NoGradGuard. All runtime checks using GradMode::is_enabled()
  //    are applicable to InferenceMode as well, e.g.
  //    `tensorTypeInCurrentExecutionContext` in interpreter.cpp.
--- a/c10/core/SymInt.h
+++ b/c10/core/SymInt.h
@ -217,7 +217,7 @@ class C10_API SymInt {
    return i > MAX_UNREPRESENTABLE_INT;
  }

-  // Return the min represetable integer as a SymInt
+  // Return the min representable integer as a SymInt
  static constexpr int64_t min_representable_int() {
    return MAX_UNREPRESENTABLE_INT + 1;
  }
@ -234,7 +234,7 @@ class C10_API SymInt {
  //   is_symbolic().  FB only: https://fburl.com/strobelight/5l50ncxd
  //   (you will need to change the time window).
  //
-  // So, the scheme is to reserve large negative numbers (asssuming
+  // So, the scheme is to reserve large negative numbers (assuming
  // two's complement):
  //
  // - 0b0.... means we are a positive int
--- a/c10/core/TensorImpl.cpp
+++ b/c10/core/TensorImpl.cpp
@ -208,7 +208,7 @@ void TensorImpl::HandleResize() {
  // will create the data storage.
  bool reset_tensor = false;
  if (reserved_) {
-    // If tensor is reserved then don't claim its memeory unless nbytes()
+    // If tensor is reserved then don't claim its memory unless nbytes()
    // is smaller than new size
    reset_tensor =
        storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize();
--- a/c10/core/TensorOptions.h
+++ b/c10/core/TensorOptions.h
@ -105,7 +105,7 @@ inline bool pinned_memory_or_default(c10::optional<bool> pinned_memory) {
 /// `torch.device` object (e.g., "cuda:1" can be passed to everywhere a
 /// `torch.device("cuda:1")` is accepted). To support the syntax
 /// `at::empty({10}, {kCUDA, 1})` and `tensor.to(kCUDA)`, we need to make sure
-/// that `TensorOptions` is implicitly constructible with any argments that a
+/// that `TensorOptions` is implicitly constructible with any arguments that a
 /// `Device` can constructed from. So we have,
 ///
 ///    /* implicit */ TensorOptions(T&& device) : TensorOptions() {
@ -120,7 +120,7 @@ inline bool pinned_memory_or_default(c10::optional<bool> pinned_memory) {
 ///
 ///
 /// But this will be problematic. Consider this: `TensorOptions({kCUDA, 1})`.
-/// Compiler will compain about ambiguity between the copy constructor and the
+/// Compiler will complain about ambiguity between the copy constructor and the
 /// `Device` constructor because `{kCUDA, 1}` can be converted to both a
 /// `TensorOption` and a `Device`.
 ///
--- a/c10/core/impl/InlineDeviceGuard.h
+++ b/c10/core/impl/InlineDeviceGuard.h
@ -249,7 +249,7 @@ class InlineOptionalDeviceGuard {
  explicit InlineOptionalDeviceGuard(Args&&... args)
      : guard_(in_place, std::forward<Args>(args)...) {}

-  // TODO: Consider readding Tensor and TensorList constructors here, when
+  // TODO: Consider reading Tensor and TensorList constructors here, when
  // Tensor moves to c10.  (These are only valid on OptionalDeviceGuard,
  // because a Tensor may be undefined, in which case we need an uninitialized
  // tensor guard.)
--- a/c10/cuda/CUDACachingAllocator.h
+++ b/c10/cuda/CUDACachingAllocator.h
@ -144,7 +144,7 @@ struct TraceEntry {
                    // This event is generated when a free actually completes.
    SEGMENT_ALLOC, // a call to cudaMalloc to get more memory from the OS
    SEGMENT_FREE, // a call to cudaFree to return memory to the OS (e.g. to
-                  // defragement or empty_caches)
+                  // defragment or empty_caches)
    SNAPSHOT, // a call to snapshot, used to correlate memory snapshots to trace
              // events
    OOM // the allocator threw an OutOfMemoryError (addr_ is the amount of free
--- a/c10/cuda/test/impl/CUDAAssertionsTest_catches_stream.cu
+++ b/c10/cuda/test/impl/CUDAAssertionsTest_catches_stream.cu
@ -14,7 +14,7 @@
 using ::testing::HasSubstr;

 /**
- * Device kernel that takes mulitple integer parameters as arguments and
+ * Device kernel that takes multiple integer parameters as arguments and
 * will always trigger a device side assertion.
 */
 __global__ void cuda_multiple_vars_always_fail_assertion_kernel(
--- a/c10/cuda/test/impl/CUDAAssertionsTest_catches_thread_and_block_and_device.cu
+++ b/c10/cuda/test/impl/CUDAAssertionsTest_catches_thread_and_block_and_device.cu
@ -33,8 +33,8 @@ __global__ void cuda_device_assertions_fail_on_thread_block_kernel(
 /**
 * TEST: Triggering device side assertion on only 1 thread from <<<1024,128>>>
 * grid. kernel used is unique, it take 2 parameters to tell which particular
- * block and thread it should assert, all the other theads of the kernel will be
- * basically no-op.
+ * block and thread it should assert, all the other threads of the kernel will
+ * be basically no-op.
 */
 void cuda_device_assertions_catches_thread_and_block_and_device() {
  const auto stream = c10::cuda::getStreamFromPool();
--- a/c10/cuda/test/impl/CUDAAssertionsTest_multiple_writes_from_same_block.cu
+++ b/c10/cuda/test/impl/CUDAAssertionsTest_multiple_writes_from_same_block.cu
@ -29,7 +29,7 @@ __global__ void cuda_always_fail_assertion_kernel(
 /**
 * TEST: Triggering device side assertion from single block and multiple threads
 * <<<1,128>>>. Once the very first thread asserts all the other threads will
- * basically be in bad state and the block id with failed asseriton would be
+ * basically be in bad state and the block id with failed assertion would be
 * [0,0,0].
 */
 void cuda_device_assertions_multiple_writes_from_same_block() {
--- a/c10/util/Flags.h
+++ b/c10/util/Flags.h
@ -81,7 +81,7 @@ C10_API bool CommandLineFlagsHasBeenParsed();
 // export on Windows platform (with dllexport) but not on linux/mac (with
 // default visibility). As a result, to ensure that we are always exporting
 // global variables, we will redefine the GFLAGS_DLL_DEFINE_FLAG macro if we
-// are building C10 as a shared libray.
+// are building C10 as a shared library.
 // This has to be done after the inclusion of gflags, because some early
 // versions of gflags.h (e.g. 2.0 on ubuntu 14.04) directly defines the
 // macros, so we need to do definition after gflags is done.
@ -111,7 +111,7 @@ namespace gflags = google;
 // (3) Gflags has a design issue that does not properly expose the global flags,
 // if one builds the library with -fvisibility=hidden. The current gflags (as of
 // Aug 2018) only deals with the Windows case using dllexport, and not the Linux
-// counterparts. As a result, we will explciitly use C10_EXPORT to export the
+// counterparts. As a result, we will explicitly use C10_EXPORT to export the
 // flags defined in C10. This is done via a global reference, so the flag
 // itself is not duplicated - under the hood it is the same global gflags flag.
 #define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \
--- a/c10/util/Half.h
+++ b/c10/util/Half.h
@ -296,7 +296,7 @@ inline float fp16_ieee_to_fp32_value(uint16_t h) {
   * single-precision floating-point number is represented as: FP32 = (1 +
   * mantissa * 2**(-23)) * 2**(exponent - 127) Therefore, when the biased
   * exponent is 126, a unit change in the mantissa of the input denormalized
-   * half-precision number causes a change of the constructud single-precision
+   * half-precision number causes a change of the constructed single-precision
   * number by 2**(-24), i.e. the same amount.
   *
   * The last step is to adjust the bias of the constructed single-precision
--- a/c10/util/SmallBuffer.h
+++ b/c10/util/SmallBuffer.h
@ -4,8 +4,8 @@

 /** Helper class for allocating temporary fixed size arrays with SBO.
 *
- * This is intentionally much simpler than SmallVector, to improve performace at
- * the expense of many features:
+ * This is intentionally much simpler than SmallVector, to improve performance
+ * at the expense of many features:
 * - No zero-initialization for numeric types
 * - No resizing after construction
 * - No copy/move
--- a/c10/util/Synchronized.h
+++ b/c10/util/Synchronized.h
@ -16,7 +16,7 @@ namespace c10 {
 *
 * This class implements a small subset of the generic functionality
 * implemented by folly:Synchronized<T>. Specifically, only withLock<T>
- * is implemeted here since it's the smallest possible API that is
+ * is implemented here since it's the smallest possible API that is
 * able to cover a large surface area of functionality offered by
 * folly::Synchronized<T>.
 */
--- a/c10/util/Unroll.h
+++ b/c10/util/Unroll.h
@ -1,7 +1,7 @@
 #pragma once
 #include <c10/macros/Macros.h>

-// Utility to guaruntee complete unrolling of a loop where the bounds are known
+// Utility to guarantee complete unrolling of a loop where the bounds are known
 // at compile time. Various pragmas achieve similar effects, but are not as
 // portable across compilers.

--- a/c10/util/order_preserving_flat_hash_map.h
+++ b/c10/util/order_preserving_flat_hash_map.h
@ -864,7 +864,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal {
  }

  void swap_adjacent_nodes(EntryPointer before, EntryPointer after) {
-    // sentinel stays consant, so before->prev cannot equal after
+    // sentinel stays constant, so before->prev cannot equal after
    auto before_prev = before->prev;
    auto after_next = after->next;

@ -938,7 +938,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal {
    // We maintain the invariant that:
    // - result.current_entry contains the new value we're inserting
    //   and is in the LinkedList position of to_insert
-    // - to_insert contains the value that reprseents the position of
+    // - to_insert contains the value that represents the position of
    //   result.current_entry
    swap(to_insert, current_entry->value);
    iterator result = {current_entry};
--- a/c10/util/strides.h
+++ b/c10/util/strides.h
@ -9,7 +9,7 @@ static inline DimVector contiguous_strides(const IntArrayRef sizes) {
  using Int = IntArrayRef::value_type;
  const Int dims = static_cast<Int>(sizes.size());

-  // With this intialisation we get the case dim == 0 or 1 right
+  // With this initialisation we get the case dim == 0 or 1 right
  DimVector strides(dims, 1);

  for (auto i = dims - 2; i >= 0; --i) {