mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Fix typos under c10 directory (#98079)
This PR fixes typos in comments and messages of files under `c10` directory Pull Request resolved: https://github.com/pytorch/pytorch/pull/98079 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
762a2079c7
commit
64b8d20a5c
@ -17,7 +17,7 @@ namespace c10 {
|
||||
/// DeviceIndex directly.
|
||||
using DeviceIndex = int8_t;
|
||||
|
||||
/// Represents a a compute device on which a tensor is located. A device is
|
||||
/// Represents a compute device on which a tensor is located. A device is
|
||||
/// uniquely identified by a type, which specifies the type of machine it is
|
||||
/// (e.g. CPU or CUDA GPU), and a device index or ordinal, which identifies the
|
||||
/// specific compute device when there is more than one of a certain type. The
|
||||
@ -146,7 +146,7 @@ struct C10_API Device final {
|
||||
return type_ == DeviceType::CPU;
|
||||
}
|
||||
|
||||
/// Return true if the device supports arbirtary strides.
|
||||
/// Return true if the device supports arbitrary strides.
|
||||
bool supports_as_strided() const noexcept {
|
||||
return type_ != DeviceType::IPU && type_ != DeviceType::XLA &&
|
||||
type_ != DeviceType::Lazy;
|
||||
|
@ -113,7 +113,7 @@ std::ostream& operator<<(std::ostream& stream, DeviceType type) {
|
||||
// It's also totally fine for this to be slow, since it happens exactly once
|
||||
// at import time.
|
||||
// (2) Atomic is needed during reading:
|
||||
// Whenever a user prints a privatuse1 device name, they need to read this
|
||||
// Whenever a user prints a privateuse1 device name, they need to read this
|
||||
// variable. Although unlikely, we'll data race if someone else is trying to
|
||||
// set this variable at the same time that another thread is print the
|
||||
// device name. We could re-use the same mutex, but reading the atomic will
|
||||
|
@ -520,7 +520,7 @@ constexpr bool isAliasDispatchKey(DispatchKey k) {
|
||||
// [Note: Per-Backend Functionality Dispatch Keys]
|
||||
// Check if a DispatchKey is a per-backend functionality key
|
||||
// Any functionalities that can be customized per-backend should be added here.
|
||||
// These keys correspond to functionalities that can be customized indivually
|
||||
// These keys correspond to functionalities that can be customized individually
|
||||
// per backend. While they only take up one bit in the `DispatchKeySet` bitset,
|
||||
// they map to (# backends) slots in the operator table.
|
||||
// Each of these keys also has a separate set of "runtime keys" in the dispatch
|
||||
|
@ -259,7 +259,7 @@ std::array<FunctionalityOffsetAndMask, num_functionality_keys>
|
||||
initializeFunctionalityOffsetsAndMasks() {
|
||||
std::array<FunctionalityOffsetAndMask, num_functionality_keys>
|
||||
offsets_and_masks;
|
||||
// manualy set the first entry, which corresponds to Undefined.
|
||||
// manually set the first entry, which corresponds to Undefined.
|
||||
offsets_and_masks[0] = FunctionalityOffsetAndMask(0, 0);
|
||||
// loop through every functionality key (aside from Undefined).
|
||||
for (const auto functionality_idx : c10::irange(1, num_functionality_keys)) {
|
||||
|
@ -750,7 +750,7 @@ constexpr auto autograd_privateuse3_ks =
|
||||
constexpr auto autograd_other_ks = DispatchKeySet(DispatchKey::AutogradOther);
|
||||
constexpr auto autograd_nested =
|
||||
DispatchKeySet(DispatchKey::AutogradNestedTensor);
|
||||
// keyset correpsonding to functorch keys that have their own dedicated
|
||||
// keyset corresponding to functorch keys that have their own dedicated
|
||||
// TensorImpl subclass.
|
||||
constexpr auto functorch_transforms_ks = DispatchKeySet(
|
||||
{DispatchKey::FuncTorchBatched,
|
||||
|
@ -88,7 +88,7 @@ struct Event final {
|
||||
/**
|
||||
* Increments the event's version and enqueues a job with this version
|
||||
* in the stream's work queue. When the stream process that job
|
||||
* it nofifies all streams waiting on / blocked by that version of the
|
||||
* it notifies all streams waiting on / blocked by that version of the
|
||||
* event to continue and marks that version as recorded.
|
||||
* */
|
||||
void record(const Stream& stream) {
|
||||
|
@ -63,7 +63,7 @@ static uint64_t readURandomLong() {
|
||||
* /dev/urandom or the current time. For CUDA, gets random from
|
||||
* std::random_device and adds a transformation on it. For Intel SGX
|
||||
* platform use sgx_read_rand as reading from /dev/urandom is
|
||||
* prohibited on that platfrom.
|
||||
* prohibited on that platform.
|
||||
*
|
||||
* FIXME: The behavior in this function is from legacy code
|
||||
* (THRandom_seed/THCRandom_seed) and is probably not the right thing to do,
|
||||
|
@ -46,7 +46,7 @@ struct C10_API InferenceMode {
|
||||
//
|
||||
// 3. Why does setting InferenceMode also set GradMode?
|
||||
//
|
||||
// This is required since InferenceMode is a faster and more restricive
|
||||
// This is required since InferenceMode is a faster and more restrictive
|
||||
// version of NoGradGuard. All runtime checks using GradMode::is_enabled()
|
||||
// are applicable to InferenceMode as well, e.g.
|
||||
// `tensorTypeInCurrentExecutionContext` in interpreter.cpp.
|
||||
|
@ -217,7 +217,7 @@ class C10_API SymInt {
|
||||
return i > MAX_UNREPRESENTABLE_INT;
|
||||
}
|
||||
|
||||
// Return the min represetable integer as a SymInt
|
||||
// Return the min representable integer as a SymInt
|
||||
static constexpr int64_t min_representable_int() {
|
||||
return MAX_UNREPRESENTABLE_INT + 1;
|
||||
}
|
||||
@ -234,7 +234,7 @@ class C10_API SymInt {
|
||||
// is_symbolic(). FB only: https://fburl.com/strobelight/5l50ncxd
|
||||
// (you will need to change the time window).
|
||||
//
|
||||
// So, the scheme is to reserve large negative numbers (asssuming
|
||||
// So, the scheme is to reserve large negative numbers (assuming
|
||||
// two's complement):
|
||||
//
|
||||
// - 0b0.... means we are a positive int
|
||||
|
@ -208,7 +208,7 @@ void TensorImpl::HandleResize() {
|
||||
// will create the data storage.
|
||||
bool reset_tensor = false;
|
||||
if (reserved_) {
|
||||
// If tensor is reserved then don't claim its memeory unless nbytes()
|
||||
// If tensor is reserved then don't claim its memory unless nbytes()
|
||||
// is smaller than new size
|
||||
reset_tensor =
|
||||
storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize();
|
||||
|
@ -105,7 +105,7 @@ inline bool pinned_memory_or_default(c10::optional<bool> pinned_memory) {
|
||||
/// `torch.device` object (e.g., "cuda:1" can be passed to everywhere a
|
||||
/// `torch.device("cuda:1")` is accepted). To support the syntax
|
||||
/// `at::empty({10}, {kCUDA, 1})` and `tensor.to(kCUDA)`, we need to make sure
|
||||
/// that `TensorOptions` is implicitly constructible with any argments that a
|
||||
/// that `TensorOptions` is implicitly constructible with any arguments that a
|
||||
/// `Device` can constructed from. So we have,
|
||||
///
|
||||
/// /* implicit */ TensorOptions(T&& device) : TensorOptions() {
|
||||
@ -120,7 +120,7 @@ inline bool pinned_memory_or_default(c10::optional<bool> pinned_memory) {
|
||||
///
|
||||
///
|
||||
/// But this will be problematic. Consider this: `TensorOptions({kCUDA, 1})`.
|
||||
/// Compiler will compain about ambiguity between the copy constructor and the
|
||||
/// Compiler will complain about ambiguity between the copy constructor and the
|
||||
/// `Device` constructor because `{kCUDA, 1}` can be converted to both a
|
||||
/// `TensorOption` and a `Device`.
|
||||
///
|
||||
|
@ -249,7 +249,7 @@ class InlineOptionalDeviceGuard {
|
||||
explicit InlineOptionalDeviceGuard(Args&&... args)
|
||||
: guard_(in_place, std::forward<Args>(args)...) {}
|
||||
|
||||
// TODO: Consider readding Tensor and TensorList constructors here, when
|
||||
// TODO: Consider reading Tensor and TensorList constructors here, when
|
||||
// Tensor moves to c10. (These are only valid on OptionalDeviceGuard,
|
||||
// because a Tensor may be undefined, in which case we need an uninitialized
|
||||
// tensor guard.)
|
||||
|
@ -144,7 +144,7 @@ struct TraceEntry {
|
||||
// This event is generated when a free actually completes.
|
||||
SEGMENT_ALLOC, // a call to cudaMalloc to get more memory from the OS
|
||||
SEGMENT_FREE, // a call to cudaFree to return memory to the OS (e.g. to
|
||||
// defragement or empty_caches)
|
||||
// defragment or empty_caches)
|
||||
SNAPSHOT, // a call to snapshot, used to correlate memory snapshots to trace
|
||||
// events
|
||||
OOM // the allocator threw an OutOfMemoryError (addr_ is the amount of free
|
||||
|
@ -14,7 +14,7 @@
|
||||
using ::testing::HasSubstr;
|
||||
|
||||
/**
|
||||
* Device kernel that takes mulitple integer parameters as arguments and
|
||||
* Device kernel that takes multiple integer parameters as arguments and
|
||||
* will always trigger a device side assertion.
|
||||
*/
|
||||
__global__ void cuda_multiple_vars_always_fail_assertion_kernel(
|
||||
|
@ -33,8 +33,8 @@ __global__ void cuda_device_assertions_fail_on_thread_block_kernel(
|
||||
/**
|
||||
* TEST: Triggering device side assertion on only 1 thread from <<<1024,128>>>
|
||||
* grid. kernel used is unique, it take 2 parameters to tell which particular
|
||||
* block and thread it should assert, all the other theads of the kernel will be
|
||||
* basically no-op.
|
||||
* block and thread it should assert, all the other threads of the kernel will
|
||||
* be basically no-op.
|
||||
*/
|
||||
void cuda_device_assertions_catches_thread_and_block_and_device() {
|
||||
const auto stream = c10::cuda::getStreamFromPool();
|
||||
|
@ -29,7 +29,7 @@ __global__ void cuda_always_fail_assertion_kernel(
|
||||
/**
|
||||
* TEST: Triggering device side assertion from single block and multiple threads
|
||||
* <<<1,128>>>. Once the very first thread asserts all the other threads will
|
||||
* basically be in bad state and the block id with failed asseriton would be
|
||||
* basically be in bad state and the block id with failed assertion would be
|
||||
* [0,0,0].
|
||||
*/
|
||||
void cuda_device_assertions_multiple_writes_from_same_block() {
|
||||
|
@ -81,7 +81,7 @@ C10_API bool CommandLineFlagsHasBeenParsed();
|
||||
// export on Windows platform (with dllexport) but not on linux/mac (with
|
||||
// default visibility). As a result, to ensure that we are always exporting
|
||||
// global variables, we will redefine the GFLAGS_DLL_DEFINE_FLAG macro if we
|
||||
// are building C10 as a shared libray.
|
||||
// are building C10 as a shared library.
|
||||
// This has to be done after the inclusion of gflags, because some early
|
||||
// versions of gflags.h (e.g. 2.0 on ubuntu 14.04) directly defines the
|
||||
// macros, so we need to do definition after gflags is done.
|
||||
@ -111,7 +111,7 @@ namespace gflags = google;
|
||||
// (3) Gflags has a design issue that does not properly expose the global flags,
|
||||
// if one builds the library with -fvisibility=hidden. The current gflags (as of
|
||||
// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
|
||||
// counterparts. As a result, we will explciitly use C10_EXPORT to export the
|
||||
// counterparts. As a result, we will explicitly use C10_EXPORT to export the
|
||||
// flags defined in C10. This is done via a global reference, so the flag
|
||||
// itself is not duplicated - under the hood it is the same global gflags flag.
|
||||
#define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \
|
||||
|
@ -296,7 +296,7 @@ inline float fp16_ieee_to_fp32_value(uint16_t h) {
|
||||
* single-precision floating-point number is represented as: FP32 = (1 +
|
||||
* mantissa * 2**(-23)) * 2**(exponent - 127) Therefore, when the biased
|
||||
* exponent is 126, a unit change in the mantissa of the input denormalized
|
||||
* half-precision number causes a change of the constructud single-precision
|
||||
* half-precision number causes a change of the constructed single-precision
|
||||
* number by 2**(-24), i.e. the same amount.
|
||||
*
|
||||
* The last step is to adjust the bias of the constructed single-precision
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
/** Helper class for allocating temporary fixed size arrays with SBO.
|
||||
*
|
||||
* This is intentionally much simpler than SmallVector, to improve performace at
|
||||
* the expense of many features:
|
||||
* This is intentionally much simpler than SmallVector, to improve performance
|
||||
* at the expense of many features:
|
||||
* - No zero-initialization for numeric types
|
||||
* - No resizing after construction
|
||||
* - No copy/move
|
||||
|
@ -16,7 +16,7 @@ namespace c10 {
|
||||
*
|
||||
* This class implements a small subset of the generic functionality
|
||||
* implemented by folly:Synchronized<T>. Specifically, only withLock<T>
|
||||
* is implemeted here since it's the smallest possible API that is
|
||||
* is implemented here since it's the smallest possible API that is
|
||||
* able to cover a large surface area of functionality offered by
|
||||
* folly::Synchronized<T>.
|
||||
*/
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
// Utility to guaruntee complete unrolling of a loop where the bounds are known
|
||||
// Utility to guarantee complete unrolling of a loop where the bounds are known
|
||||
// at compile time. Various pragmas achieve similar effects, but are not as
|
||||
// portable across compilers.
|
||||
|
||||
|
@ -864,7 +864,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal {
|
||||
}
|
||||
|
||||
void swap_adjacent_nodes(EntryPointer before, EntryPointer after) {
|
||||
// sentinel stays consant, so before->prev cannot equal after
|
||||
// sentinel stays constant, so before->prev cannot equal after
|
||||
auto before_prev = before->prev;
|
||||
auto after_next = after->next;
|
||||
|
||||
@ -938,7 +938,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal {
|
||||
// We maintain the invariant that:
|
||||
// - result.current_entry contains the new value we're inserting
|
||||
// and is in the LinkedList position of to_insert
|
||||
// - to_insert contains the value that reprseents the position of
|
||||
// - to_insert contains the value that represents the position of
|
||||
// result.current_entry
|
||||
swap(to_insert, current_entry->value);
|
||||
iterator result = {current_entry};
|
||||
|
@ -9,7 +9,7 @@ static inline DimVector contiguous_strides(const IntArrayRef sizes) {
|
||||
using Int = IntArrayRef::value_type;
|
||||
const Int dims = static_cast<Int>(sizes.size());
|
||||
|
||||
// With this intialisation we get the case dim == 0 or 1 right
|
||||
// With this initialisation we get the case dim == 0 or 1 right
|
||||
DimVector strides(dims, 1);
|
||||
|
||||
for (auto i = dims - 2; i >= 0; --i) {
|
||||
|
Reference in New Issue
Block a user