Fix typos, via a Levenshtein-type corrector (#31523)

Summary:
Should be non-semantic.

Uses https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines to find likely typos, with https://github.com/bwignall/typochecker to help automate the checking.

Uses an updated version of the tool used in https://github.com/pytorch/pytorch/pull/30606 .
Pull Request resolved: https://github.com/pytorch/pytorch/pull/31523

Differential Revision: D19216749

Pulled By: mrshenli

fbshipit-source-id: 7fd489cb9a77cd7e4950c1046f925d57524960ea
This commit is contained in:
Brian Wignall
2020-01-17 16:01:29 -08:00
committed by Facebook Github Bot
parent c8ca70e39d
commit f326045b37
252 changed files with 284 additions and 284 deletions

View File

@ -270,7 +270,7 @@ if (MSVC)
endif()
# /bigobj increases number of sections in .obj file, which is needed to link
# against libaries in Python 2.7 under Windows
# against libraries in Python 2.7 under Windows
set(${flag_var} "${${flag_var}} /MP /bigobj")
endforeach(flag_var)

View File

@ -10,7 +10,7 @@
/test/test_c10d.py @pietern @mrshenli @zhaojuanmao
/torch/utils/cpp_extension.py @goldsborough @fmassa @soumith @ezyang
# Not there to stricly require the approval, but to be tagged as a reviewer
# Not there to strictly require the approval, but to be tagged as a reviewer
# on the PRs to push them into a high priority inbox.
/torch/csrc/api/data/ @apaszke
/torch/csrc/autograd/ @apaszke

View File

@ -24,7 +24,7 @@ else()
set(CAFFE2_STATIC_LINK_CUDA_INT 0)
endif()
CONFIGURE_FILE(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
# TODO: Don't unconditionally generate CUDAConfig.h.in. Unfortuantely,
# TODO: Don't unconditionally generate CUDAConfig.h.in. Unfortunately,
# this file generates AT_ROCM_ENABLED() which is required by the miopen
# files, which are compiled even if we are doing a vanilla CUDA build.
# Once we properly split CUDA and HIP in ATen, we can remove this code.

View File

@ -8,7 +8,7 @@ namespace c10 {
namespace detail {
// WrapRuntimeKernelFunctor: Wraps any runtime functor into a functor that
// inherits from c10::OperatorKernel, so it can be used as a c10 kernel.
// This can, for example, be used for lamdas, functors or even function pointers.
// This can, for example, be used for lambdas, functors or even function pointers.
// In the case of function pointers, since it is a runtime function pointer,
// there is an overhead for calling it whenever the kernel is invoked.
template<class FuncType, class ReturnType, class ParameterList> class WrapRuntimeKernelFunctor_ {};

View File

@ -184,7 +184,7 @@ struct FunctionSchema {
std::vector<Argument> returns_;
// if true then this schema takes an arbitrary number of additional arguments
// after the argument specified in arguments
// currently this is used primarily to represent 'primtive' operators whose
// currently this is used primarily to represent 'primitive' operators whose
// arguments are not checked by schema
bool is_vararg_;
bool is_varret_;

View File

@ -1366,7 +1366,7 @@ struct getTypePtr_<at::optional<T>> final {
} // namespace detail
template <class T>
inline TypePtr getTypePtr() {
// TODO: static_assert that a templated function exists, and throw a friendy
// TODO: static_assert that a templated function exists, and throw a friendly
// error message if not
return detail::getTypePtr_<T>::call();
}

View File

@ -84,7 +84,7 @@ public:
// a constexpr variable if we never odr-use it. But it seems that some
// versions GCC/Clang have buggy determinations on whether or not an
// identifier is odr-used or not, and in any case it's hard to tell if
// a variable is odr-used or not. So best to just cut the probem at the root.
// a variable is odr-used or not. So best to just cut the problem at the root.
static constexpr int size() {
return 32 / sizeof(T);
}

View File

@ -94,7 +94,7 @@ uint64_t CUDAGenerator::current_seed() const {
}
/**
* Gets a nondeterminstic random number from /dev/urandom or time,
* Gets a nondeterministic random number from /dev/urandom or time,
* seeds the CPUGenerator with it and then returns that number.
*
* FIXME: You can move this function to Generator.cpp if the algorithm

View File

@ -53,7 +53,7 @@ namespace at { namespace cuda {
// NOTE [ ATen NVRTC Stub and HIP ]
//
// ATen's NVRTC stub library, caffe2_nvrtc, provides dynamic loading of both
// NVRTC and driver APIs. While the former is not yet suppoted for HIP, the
// NVRTC and driver APIs. While the former is not yet supported for HIP, the
// later is supported and needed (e.g., in CUDAHooks::getDeviceWithPrimaryContext()
// used by tensor.pin_memory()).
//

View File

@ -76,7 +76,7 @@ public:
T* desc() const { return desc_.get(); }
T* desc() { return desc_.get(); }
// Use mut_desc() to access the underlying desciptor pointer
// Use mut_desc() to access the underlying descriptor pointer
// if you intend to modify what it points to (e.g., using
// cudnnSetFooDescriptor). This will ensure that the descriptor
// is initialized. Code in this file will use this function.

View File

@ -27,7 +27,7 @@ namespace c10 { namespace hip {
// HIP occurs; instead, anywhere we see "CUDA", it actually means "HIP".
// For example, when you use HIPified PyTorch, you say x.cuda() to
// move a tensor onto ROCm device. We call this situation "HIP
// maquerading as CUDA".
// masquerading as CUDA".
//
// This leads to a very awkward situation when we want to call c10_hip
// code from PyTorch, since c10_hip is expecting things to be called

View File

@ -61,7 +61,7 @@ public:
T* desc() const { return desc_.get(); }
T* desc() { return desc_.get(); }
// Use mut_desc() to access the underlying desciptor pointer
// Use mut_desc() to access the underlying descriptor pointer
// if you intend to modify what it points to (e.g., using
// miopenSetFooDescriptor). This will ensure that the descriptor
// is initialized. Code in this file will use this function.

View File

@ -1104,7 +1104,7 @@ Tensor _lu_solve_helper_cpu(const Tensor& self, const Tensor& LU_data, const Ten
return self_working_copy;
}
// Supports arbitrary batch dimensions for self and LU_data (implicity LU_pivots also)
// Supports arbitrary batch dimensions for self and LU_data (implicitly LU_pivots also)
Tensor lu_solve(const Tensor& self, const Tensor& LU_data, const Tensor& LU_pivots) {
TORCH_CHECK(self.dim() >= 2,
"b should have at least 2 dimensions, but has ", self.dim(), " dimensions instead");

View File

@ -59,7 +59,7 @@ static inline void multi_margin_loss_cpu_kernel(
using accscalar_t = at::acc_type<scalar_t, false>;
// dim() != 0 check is for 1d input which produces a scalar output (that
// cannot be handeld by TensorAccessor)
// cannot be handled by TensorAccessor)
if (reduction == Reduction::None && output.dim() > 0) {
auto output_acc = output.accessor<scalar_t, 1>();
for (int64_t t = 0; t < nframe; t++) {

View File

@ -295,7 +295,7 @@ static std::vector<QuantizedCellParamsDynamic> gather_quantized_params_dynamic(
}
return result;
#else // USE_FBGEMM
TORCH_INTERNAL_ASSERT(false, "Tried to use quantized RNN wihtout FBGEMM!")
TORCH_INTERNAL_ASSERT(false, "Tried to use quantized RNN without FBGEMM!")
#endif // USE_FBGEMM
}

View File

@ -276,7 +276,7 @@ std::tuple<Tensor, Tensor> kthvalue(
return at::kthvalue(self, k, dimname_to_position(self, dim), keepdim);
}
// this does not reduce to median with dim beause we don't want to copy twice
// this does not reduce to median with dim because we don't want to copy twice
Tensor median_cpu(const Tensor& self) {
NoNamesGuard guard;
TORCH_CHECK(self.numel() > 0, "median cannot be called with empty tensor");

View File

@ -618,7 +618,7 @@ Tensor index_select_sparse(const Tensor& self, int64_t dim, const Tensor& index)
self - sparse tensor, its shape is sizes = sparse_shape + dense_shape
indices - 2-D tensor of indices, shape is (sparse_dims, nnz)
values - (1+len(dense_shape))-D tensor of values, shape is (nnz,) + dense_shape
index_select(dim, index) returns a sparse tensor with the follwing data
index_select(dim, index) returns a sparse tensor with the following data
new_sizes = sizes[:dim] + (n,) + sizes[dim+1:]
new_indices - shape is (sparse_dims, new_nnz)
new_values - shape is (new_nnz,) + dense_shape

View File

@ -85,7 +85,7 @@ static void unfolded3d_copy(
const int64_t input_hw = input_height * input_width;
const int64_t input_dhw = input_hw * input_depth;
// the following variables are updated ouside the most inner loop
// the following variables are updated outside the most inner loop
int64_t d = d_out * dT - pT + i;
int64_t h = h_out * dH - pH + j;
int64_t ofs = nip * input_dhw + d * input_hw + h * input_width;

View File

@ -28,7 +28,7 @@
* are computed from the input and the output size;
*
*
* When the scales are infered from the input and output sizes,
* When the scales are inferred from the input and output sizes,
* we view each pixel as an area, idx + 0.5 as its center index.
* Here is an example formula in 1D case.
* if align_corners: center of two corner pixel areas are preserved,

View File

@ -26,7 +26,7 @@ struct Dist {
// map : This tells how to modify (a - b) to form the component that
// gets summed.
// red : This tells how to sum the result of map up. This is
// separate because the inf norm actuall uses max instead of
// separate because the inf norm actually uses max instead of
// sum.
// finish : This tells what to do with the aggregated value to compute
// the norm. Generally this is the result of val ^ (1 / p).

View File

@ -158,7 +158,7 @@ namespace at { namespace native { namespace {
* `apply_fn` will be called multiple times, and together cover the entire
* output spatial space.
*
* Now you should be able tp understand everything about the implementaion of
* Now you should be able tp understand everything about the implementation of
* 2D forward kernel shown at the beginning of this note.
*
**/

View File

@ -117,7 +117,7 @@ static void copy_kernel_cuda(TensorIterator& iter, bool non_blocking) {
Device dst_device = iter.device(0);
Device src_device = iter.device(1);
// Enable p2p access between devices. (No-op if it invovles the CPU)
// Enable p2p access between devices. (No-op if it involves the CPU)
bool p2p_enabled = maybe_enable_p2p_access(dst_device, src_device);
if (copy_requires_temporaries(iter, p2p_enabled)) {

View File

@ -364,7 +364,7 @@ namespace {
// assuming grad_grid is contiguous
// thus we can
// 1. use index with gGrid_sW to diectly compute gGrid_ptr_NHW
// 1. use index with gGrid_sW to directly compute gGrid_ptr_NHW
// 2. directly assign to gGrid_ptr_NHW[0], gGrid_ptr_NHW[1]
scalar_t *gGrid_ptr_NHW = grad_grid.data + index * gGrid_sW;
gGrid_ptr_NHW[0] = gix_mult * gix;
@ -383,7 +383,7 @@ namespace {
// assuming grad_grid is contiguous
// thus we can
// 1. use index with gGrid_sW to diectly compute gGrid_ptr_NHW
// 1. use index with gGrid_sW to directly compute gGrid_ptr_NHW
// 2. directly assign to gGrid_ptr_NHW[0], gGrid_ptr_NHW[1]
scalar_t *gGrid_ptr_NHW = grad_grid.data + index * gGrid_sW;
gGrid_ptr_NHW[0] = static_cast<scalar_t>(0);
@ -569,7 +569,7 @@ namespace {
// assuming grad_grid is contiguous
// thus we can
// 1. use index with gGrid_sW to diectly compute gGrid_ptr_NDHW
// 1. use index with gGrid_sW to directly compute gGrid_ptr_NDHW
// 2. directly assign to gGrid_ptr_NDHW[0], gGrid_ptr_NDHW[1], gGrid_ptr_NDHW[2]
scalar_t *gGrid_ptr_NDHW = grad_grid.data + index * gGrid_sW;
gGrid_ptr_NDHW[0] = gix_mult * gix;
@ -591,7 +591,7 @@ namespace {
// assuming grad_grid is contiguous
// thus we can
// 1. use index with gGrid_sW to diectly compute gGrid_ptr_NDHW
// 1. use index with gGrid_sW to directly compute gGrid_ptr_NDHW
// 2. directly assign to gGrid_ptr_NDHW[0], gGrid_ptr_NDHW[1], gGrid_ptr_NDHW[2]
scalar_t *gGrid_ptr_NDHW = grad_grid.data + index * gGrid_sW;
gGrid_ptr_NDHW[0] = static_cast<scalar_t>(0);

View File

@ -108,7 +108,7 @@ static Tensor wrapIndexOnce(const Tensor & index, int64_t dim, int64_t dim_size,
}
static std::vector<int64_t> computeLinearStride(const Tensor & tensor) {
// computes the stride as if tensor were contigous
// computes the stride as if tensor were contiguous
auto sizes = tensor.sizes();
std::vector<int64_t> stride(tensor.dim());
stride[tensor.dim() - 1] = 1;

View File

@ -7,7 +7,7 @@
//
// The gpu_kernel_with_scalars generates specializations that support a
// single scalar CPU argument, such as from `cuda_tensor + 5`. The CPU scalar
// is lifted to a kernel paramter instead of copying to device memory.
// is lifted to a kernel parameter instead of copying to device memory.
// This should be used in conjunction with TensorIterator::allow_cpu_scalars_,
// which is the default for TensorIterator::binary_op. Otherwise, all inputs
// and the output must be on the GPU.

View File

@ -51,7 +51,7 @@ __device__ __forceinline__ void warp_reduce(acc_t* sum) {
// A "WARP" contains "C10_WARPS_SIZE" threads, these treads are guaranteed to belong to the same warp.
// This is important because it means only __shfl_ instructions are required for reductions.
// Note that this means WARP_SIZE must be a power of two and <= architecture warp size.
// CUDA warp size is 32 for all existing GPU architecures, but there is no guarantee this will not change for future arch.
// CUDA warp size is 32 for all existing GPU architectures, but there is no guarantee this will not change for future arch.
// ROCm warp size is 64 for all currently ROCm-supported GPU architectures, but this may change for future archs.
// is_log_softmax is a flag indicating whether SoftMax or LogSoftMax should be computed.
// The template can be instantiated with any floating point type for the type arguments input_t, output_t and acc_t.

View File

@ -200,7 +200,7 @@ __global__ void cunn_SpatialSoftMaxForward(
for (uint32_t inner_index = blockIdx.y * blockDim.y + threadIdx.y; inner_index < inner_size; inner_index += blockDim.y * gridDim.y) {
const uint32_t data_offset = outer_offset + inner_index;
////////////////////////////////////////////////////////////
// These two blocks are really eqivalent, but specializing on
// These two blocks are really equivalent, but specializing on
// blockDim.x == 1 makes the kernel faster when it's unused.
// I didn't want to thread an extra template parameter, and nvcc
// seems to be smart enough to hoist the if outside of the loops.

View File

@ -177,7 +177,7 @@ void kthvalue_cuda_template(
AT_CUDA_CHECK(cudaGetLastError());
}
// this does not reduce to median with dim beause we don't want to copy twice
// this does not reduce to median with dim because we don't want to copy twice
template <typename scalar_t>
Tensor median_cuda_template(const Tensor& self) {
TORCH_CHECK(self.numel() > 0, "median cannot be called with empty tensor");

View File

@ -211,7 +211,7 @@ inline int64_t resolve_root_int(
// (row + 2f - 1)row <= 2x
// row^2 + (2f-1)row - 2x <= 0. [3]
//
// Based on ineuqality [3], we have the following coefficients for formula of
// Based on inequality [3], we have the following coefficients for formula of
// root:
// a = 1
// b = 2f - 1
@ -254,7 +254,7 @@ inline void get_coordinate_in_tril_trapezoid(
// (-row + 2f + 1)row <= 2x
// row^2 - (2f+1)row + 2x >= 0. [3]
//
// Based on ineuqality [3], we have the following coefficients for formula of
// Based on inequality [3], we have the following coefficients for formula of
// root:
// a = 1
// b = -1 - 2f

View File

@ -213,7 +213,7 @@ __device__ __forceinline__ static void upsample_increment_value_bounded(
accscalar_t value) {
int access_y = max(min(y, height - 1), 0);
int access_x = max(min(x, width - 1), 0);
/* TODO: result here is trucated to scalar_t,
/* TODO: result here is truncated to scalar_t,
check: https://github.com/pytorch/pytorch/pull/19630#discussion_r281426912
*/
gpuAtomicAdd(

View File

@ -1119,7 +1119,7 @@ std::tuple<Tensor, Tensor> pack_hidden<std::tuple<Tensor, Tensor>>(const Tensor&
struct DropoutState {
// Both buffer and event are lazily instantiated when a dropout state is needed
// for the first time. Note that in this case needed != used, as we don't need
// a bufer to e.g. run RNNs in test mode.
// a buffer to e.g. run RNNs in test mode.
at::Tensor buffer;
c10::optional<cuda::CUDAEvent> event;
std::mutex mutex;

View File

@ -99,7 +99,7 @@ static inline void _fft_fill_with_conjugate_symmetry_slice(Tensor& output,
// 1. if this dim idx becomes 1, will need to add (size - 1) * stride
// 2. otherwise, will need to subtract stride
if (from_slice_indices[d] == 0) {
// Substract. Carries over to previous dimension
// Subtract. Carries over to previous dimension
from_slice_data -= output.stride(d);
} else if (from_slice_indices[d] == 1) {
// Dimension index becomes 1
@ -107,7 +107,7 @@ static inline void _fft_fill_with_conjugate_symmetry_slice(Tensor& output,
from_slice_data += (output.size(d) - 1) * output.stride(d);
break;
} else {
// Substract. Doesn't carry over to previous dimension
// Subtract. Doesn't carry over to previous dimension
from_slice_data -= output.stride(d);
break;
}

View File

@ -43,7 +43,7 @@ using namespace mkldnn;
namespace {
// Helper function for getting an ideep tensor out of an aten Tensor.
// Note in case the aten Tensor is a dense tensor, the retured ideep
// Note in case the aten Tensor is a dense tensor, the returned ideep
// tensor is just a view of the storage of the aten dense tensor, so
// caller needs to make sure the aten dense tensor's lifetime is
// longer than the ideep tensor.

View File

@ -23,7 +23,7 @@ inline int start_index(int out_idx, int out_len, int in_len) {
* in_len: the dimension_size of input matrix
* Basically, in_len / out_len gives the number of
* elements in each average computation.
* This functin computes the start index on input matrix.
* This function computes the start index on input matrix.
*/
return (int)std::floor((float)(out_idx * in_len) / out_len);
}

View File

@ -23,7 +23,7 @@ Tensor quantized_clamp_impl(
qclamp_stub(qx.device().type(), qx, *min, *max, qy);
} else {
TORCH_CHECK(
false, "Both min and max should be specifed for quantized clamp!");
false, "Both min and max should be specified for quantized clamp!");
}
return qy;
}

View File

@ -15,7 +15,7 @@ inline void check_inputs(const Tensor& qa, const Tensor& qb) {
TORCH_CHECK(qa.qscheme() == kPerTensorAffine,
"Only per tensor quantization is supported in Mul.");
TORCH_CHECK(qa.qscheme() == qb.qscheme(),
"Both inputs to Mul must have the same quantization shceme.");
"Both inputs to Mul must have the same quantization scheme.");
TORCH_CHECK(qa.numel() == qb.numel(),
"Mul operands must be the same size!");
TORCH_CHECK(qa.scalar_type() == qb.scalar_type(),

View File

@ -63,7 +63,7 @@ void pytorch_qnnp_requantize_fp32__neon(
#ifdef __aarch64__
/*
* Leverage "Floating-point Convert to Signed integer, rouding to nearest
* Leverage "Floating-point Convert to Signed integer, rounding to nearest
* with ties to even" instruction. This is an ARMv8 instruction (always
* available in AArch64), which saturates result on overflow. We don't need
* to specifically consider saturated results, they will be clamped at the

View File

@ -46,7 +46,7 @@ void pytorch_qnnp_requantize_fp32__psimd(
* - Large int32_t values can't be exactly represented as FP32. We expect
* that conversion instruction would round it to nearest FP32 value with
* ties to even, but Clang documentation for __builtin_convertvector does
* not guaratee that.
* not guarantee that.
* - Product of two FP32 values is generally not exactly representation as
* an FP32 value, and will be rounded to nearest FP32 value with ties to
* even.

View File

@ -91,7 +91,7 @@ void pytorch_qnnp_requantize_precise__scalar_unsigned32(
*
* To avoid full 64-bit shift, we leverage the fact that shift >= 32, and do
* it in two steps:
* - Shift by 32, which can be implemented by extacting the high 32-bit word
* - Shift by 32, which can be implemented by extracting the high 32-bit word
* on 32-bit systems.
* - Shift by (shift - 32), which can be implemented as a 32-bit shift of
* high word of addition result.

View File

@ -11,7 +11,7 @@ struct QnnpackOperatorDeleter {
};
// PackedWeight struct for QNNPACK stores the original Weight and Bias as
// QNNPACK currently does not support an unpack function. Possible optimiation -
// QNNPACK currently does not support an unpack function. Possible optimization -
// For PyTorch Mobile, once the model is scripted and serialized we don't need
// to call unpack, so we can save some memory by checking for this case.
// Input scale is set to null in pre-pack step. QNNPACK needs bias quantized with

View File

@ -61,7 +61,7 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT
TORCH_CHECK(cuda::check_device({sparse_, r_, t, dense}));
TORCH_CHECK(dense.dim() == 2, "addmm: 2D tensor expected, got ", dense.dim(), "D tensor");
TORCH_CHECK(sparse_.sparse_dim() == 2, "addmm: expected first two dims to be sparse (indices has size 2 at first dim), but got ", sparse_.sparse_dim(), " spase dims");
TORCH_CHECK(sparse_.sparse_dim() == 2, "addmm: expected first two dims to be sparse (indices has size 2 at first dim), but got ", sparse_.sparse_dim(), " sparse dims");
// no need to check dense_dim because dense_dim + sparse_dim = dim
// mxk * kxn = mxn

View File

@ -33,7 +33,7 @@
// we should merge macros.
#ifdef _WIN32
#if !defined(AT_CORE_STATIC_WINDOWS)
// TODO: unfiy the controlling macros.
// TODO: unify the controlling macros.
#if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
#define TH_CPP_API __declspec(dllexport)
#else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)

View File

@ -1,4 +1,4 @@
#pragma once
#include <TH/THStorageFunctions.h>
// Compatability header. Use THStorageFunctions.h instead if you need this.
// Compatibility header. Use THStorageFunctions.h instead if you need this.

View File

@ -42,7 +42,7 @@ inline THStorage* THTensor_getStoragePtr(const THTensor* tensor) {
// [NOTE: nDimension vs nDimensionLegacyNoScalars vs nDimensionLegacyAll]
// nDimension corresponds to the "true" ATen dimension.
// nDimensionLegacyNoScalars correpsonds to the ATen dimension, except scalars are viewed as 1-dimensional tensors.
// nDimensionLegacyNoScalars corresponds to the ATen dimension, except scalars are viewed as 1-dimensional tensors.
// nDimensionLegacyAll corresponds to the ATen dimension, except scalars are viewed as 1-dimensional tensors
// and tensors with a dimension of size zero are collapsed to 0-dimensional tensors.
//

View File

@ -197,7 +197,7 @@ void THVector_(normal_fill)(scalar_t *data,
}
/*
* This struct's constructor initalizes the dispatch tables. It simply checks
* This struct's constructor initializes the dispatch tables. It simply checks
* what SIMD extensions are available, and then walks the dispatch table
* to choose the best function.
* NOTE: As implemented, it will initialize the dispatch pointer to the first supported function.

View File

@ -6,7 +6,7 @@
#include <cuda_runtime.h>
#endif
// A utility class to implement integer division by muliplication, given a fixed
// A utility class to implement integer division by multiplication, given a fixed
// divisor.
//
// WARNING: The fast divider algorithm is only implemented for unsigned int;

View File

@ -41,7 +41,7 @@ void THNN_(SpatialDepthwiseConvolution_updateOutput)(
weight = THCTensor_(newContiguous)(state, weight);
bias = bias ? THCTensor_(newContiguous)(state, bias) : bias;
// Following the behvaior of other THCUNN functions, we shape the output
// Following the behavior of other THCUNN functions, we shape the output
// Tensor ourselves
int batchSize = input->size(0);

View File

@ -14,7 +14,7 @@ class C2SimpleNet(object):
"""
This module constructs a net with 'op_name' operator. The net consist
a series of such operator.
It intializes the workspace with input blob equal to the number of parameters
It initializes the workspace with input blob equal to the number of parameters
needed for the op.
Provides forward method to run the net niter times.
"""

View File

@ -37,7 +37,7 @@ List all the supported tests:
$ python -m pt.add_test --list_tests
```
Filter and run a test (use `add_M8_N16_K32` as an exapmle):
Filter and run a test (use `add_M8_N16_K32` as an example):
```
$ python -m pt.add_test --test_name add_K32_M8_N1
--omp_num_threads 1 --mkl_num_threads 1

View File

@ -145,7 +145,7 @@ OpMeta = namedtuple("OpMeta", "op_type num_inputs input_dims input_types \
def generate_c2_test_from_ops(ops_metadata, bench_op, tags):
"""
This function is used to generate Caffe2 tests based on the meatdata
This function is used to generate Caffe2 tests based on the metadata
of operators. The metadata includes seven fields which are 1) op_type:
the name of the operator. 2) num_inputs: the number of input blobs.
3) input_dims: a dictionary which includes the shapes of the input blobs.

View File

@ -93,7 +93,7 @@ def _build_test(configs, bench_op, OperatorTestCase, run_backward, op_name_funct
tags = attr["tags"]
continue
# if 'cuda' is sepcified in input shape but the testing machines doesn't
# if 'cuda' is specified in input shape but the testing machines doesn't
# support, we will skip this input
if 'cuda' in attr.values():
if not torch.cuda.is_available():

View File

@ -52,7 +52,7 @@ class LSTMBenchmark(op_bench.TorchBenchmarkBase):
self.x = torch.randn(sequence_len, # sequence length
batch_size, # batch size
I) # Number of featues in X
I) # Number of features in X
self.h = torch.randn(NL * (D + 1), # layer_num * dir_num
batch_size, # batch size
H) # hidden size

View File

@ -62,7 +62,7 @@ C10_DEFINE_string(
"Report the conversion stage time to screen. "
"The format of the string is <type>|<identifier>. "
"The valid type is 'json'. "
"The valid identifier is nothing or an identifer that prefix every line");
"The valid identifier is nothing or an identifier that prefix every line");
C10_DEFINE_string(
scale,
"-1,-1",

View File

@ -63,7 +63,7 @@ C10_DEFINE_string(
"Report the conversion stage time to screen. "
"The format of the string is <type>|<identifier>. "
"The valid type is 'json'. "
"The valid identifier is nothing or an identifer that prefix every line");
"The valid identifier is nothing or an identifier that prefix every line");
C10_DEFINE_string(
scale,
"-1,-1",

View File

@ -203,7 +203,7 @@ struct C10_API StorageImpl final : public c10::intrusive_ptr_target {
data_ptr_ = std::move(data_ptr);
// NOTE: data_type might change and so it's also possible that capacity
// might not be divisible by itemsize. There is no way for us to keep track
// of the exact capacity if we're not explicity storing is. More conrectely
// of the exact capacity if we're not explicitly storing is. More concretely
// capacity() might not return the value that was set here, if itemsize does
// not evenly divide it.
numel_ = capacity / data_type_.itemsize();

View File

@ -5,7 +5,7 @@
#include <c10/util/Registry.h>
// Note: we use a different namespace to test if the macros defined in
// Registry.h actuall works with a different namespace from c10.
// Registry.h actually works with a different namespace from c10.
namespace c10_test {
class Foo {

View File

@ -10,7 +10,7 @@
namespace {
/**
* This is a helper function which attemtps to get a base value depending on the
* This is a helper function which attempts to get a base value depending on the
* # of nodes. Larger the base the better performance (up to 4) is what we have
* observed in gloo benchmarks. At the moment bcube works only if # nodes = base
* ^ x. Where x is some constant. So, if # node don't match our expectation

View File

@ -63,7 +63,7 @@ class AllreduceOp final : public Operator<Context> {
// Store which inputs/outputs this instance initialized with
update(init_);
// Verify inputs == ouputs
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0; i < init_.inputs.size(); i++) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);

View File

@ -37,7 +37,7 @@ std::unique_ptr<::gloo::Algorithm> initializeAlgorithm(
}
/**
* This is a helper function which attemtps to get a base value depending on the
* This is a helper function which attempts to get a base value depending on the
* # of nodes. Larger the base the better performance (up to 4) is what we have
* observed in gloo benchmarks. At the moment bcube works only if # nodes = base
* ^ x. Where x is some constant. So, if # node don't match our expectation

View File

@ -58,7 +58,7 @@ class BroadcastOp final : public Operator<Context> {
// Store which inputs/outputs this instance initialized with
update(init_);
// Verify inputs == ouputs
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0; i < init_.inputs.size(); i++) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);

View File

@ -73,7 +73,7 @@ class ReduceScatterOp final : public Operator<Context> {
// Store which inputs/outputs this instance initialized with
update(init_);
// Verify inputs == ouputs
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0; i < init_.inputs.size(); i++) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);

View File

@ -68,7 +68,7 @@
* The following example shows a general use case for the C++
* bindings, including support for the optional exception feature and
* also the supplied vector and string classes, see following sections for
* decriptions of these features.
* descriptions of these features.
*
* \code
* #define __CL_ENABLE_EXCEPTIONS

View File

@ -108,7 +108,7 @@ def broadcast_parameters(opts, model, num_xpus, broadcast_computed_param=False):
else caffe2_pb2.CPU
for params in all_params:
assert len(params) % num_xpus == 0, \
"Current model dosen't match device number when loading checkpoint"
"Current model doesn't match device number when loading checkpoint"
params_per_xpu = int(len(params) / num_xpus)
for idx in range(params_per_xpu):
blobs = [param for param in params[idx::params_per_xpu]]

View File

@ -507,7 +507,7 @@ void TensorRTTransformer::Transform(
return true;
};
// function to convert runnbale subgraph into a trt op. Note that to keep the
// function to convert runnable subgraph into a trt op. Note that to keep the
// interface clean, we do the double conversion from C2 op to Onnx ops here
// but it should be OK as the cost is really small. We also need to keep the
// same exporter throughout the process to avoid duplicated dummy name

View File

@ -38,7 +38,7 @@ CAFFE2_API void SerializeBlob(
/**
* @brief Convenience function to serialize a blob to a string.
*
* This is a conveinence function to serialize small Blobs that produce
* This is a convenience function to serialize small Blobs that produce
* manageable serialized strings. To serialize big blobs such as
* large sparse tensors, use the fully-functional interface in
* blob_serializer_base.h.

View File

@ -92,7 +92,7 @@ inline Dst dynamic_cast_if_rtti(Src ptr) {
}
// SkipIndices are used in operator_fallback_gpu.h and operator_fallback_mkl.h
// as utilty functions that marks input / output indices to skip when we use a
// as utility functions that marks input / output indices to skip when we use a
// CPU operator as the fallback of GPU/MKL operator option.
template <int... values>
class SkipIndices {

View File

@ -174,7 +174,7 @@ std::unique_ptr<cub::CachingDeviceAllocator> g_cub_allocator;
static std::unordered_map<void*, uint8_t> g_cuda_device_affiliation;
// Data structures for optional memory tracking. Access to these structures
// is garded by the CUDAContext::mutex.
// is guarded by the CUDAContext::mutex.
static std::unordered_map<void*, long> g_size_map;
static std::vector<long> g_total_by_gpu_map(C10_COMPILE_TIME_MAX_GPUS, 0);
static std::vector<long> g_max_by_gpu_map(C10_COMPILE_TIME_MAX_GPUS, 0);

View File

@ -471,7 +471,7 @@ class ComputeBlobRecyclingForDag {
}
}
// Rturns true if the op that generates that blob acquires all tokens.
// Returns true if the op that generates that blob acquires all tokens.
inline bool can_use_blob(
const string& blob_name,
std::unordered_set<int>* tokens,

View File

@ -76,7 +76,7 @@ class CAFFE2_API NetBase : public Observable<NetBase> {
* seconds spent during the benchmark. The 0-th item is the time spent per
* each network run, and if a net instantiation supports run_individual,
* the remainder of the vector returns the number of milliseconds spent per
* opeartor.
* operator.
*/
virtual vector<float> TEST_Benchmark(
const int /*warmup_runs*/,

View File

@ -461,7 +461,7 @@ std::shared_ptr<Tracer> create(
const std::string& net_name) {
// Enable the tracer if the net has the "enable_tracing" argument set OR
// if the command line option includes the net name option in the list of
// tracable nets.
// traceable nets.
bool trace_net = hasEnableTracingFlag(net) || isTraceableNetName(net_name);
return trace_net
? std::make_shared<Tracer>(net, net_name, getTracingConfigFromNet(net))

View File

@ -24,7 +24,7 @@ SimpleRefCountNet::SimpleRefCountNet(
std::map<string, int> last_consumed_at;
std::set<string> created_by_me;
// For each opeartor
// For each operator
for (int idx = 0; idx < net_def->op_size(); ++idx) {
const auto& op_def = net_def->op(idx);
for (const string& in_name : op_def.input()) {

View File

@ -254,7 +254,7 @@ struct CAFFE2_API NNModule {
NNModule(NNModule&&) = default;
NNModule() {}
/* Repalce subgraph sg by node, using the order of
/* Replace subgraph sg by node, using the order of
* node_inputs and node_outputs to determine how to link
* them to the node. node_inputs *must* enumerate all the
* inputs to the subgraph (NeuralNetData that do not

View File

@ -645,7 +645,7 @@ class CAFFE2_API OperatorBase : public Observable<OperatorBase> {
std::string type_;
vector<const Blob*> inputs_;
vector<Blob*> outputs_;
// Preferrably use c10::optional, but nvcc doesn't work
// Preferably use c10::optional, but nvcc doesn't work
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
std::unique_ptr<const c10::FunctionSchema> fn_schema_;
vector<c10::IValue> newstyle_inputs_;

View File

@ -131,7 +131,7 @@ class CAFFE2_API OpSchema {
OpSchema& AllowInplace(std::function<bool(int, int)> inplace);
OpSchema& AllowInplace(set<std::pair<int, int>> inplace);
OpSchema& AllowOneToOneInplace();
// Sets the rule to enforce in-place opeartion.
// Sets the rule to enforce in-place operation.
OpSchema& EnforceInplace(std::function<bool(int, int)> inplace);
OpSchema& EnforceInplace(set<std::pair<int, int>> inplace);
OpSchema& EnforceOneToOneInplace();

View File

@ -112,7 +112,7 @@ using ScopeGuardImplDecay = ScopeGuardImpl<typename std::decay<F>::type>;
/**
* ScopeGuard is a general implementation of the "Initialization is
* Resource Acquisition" idiom. Basically, it guarantees that a function
* is executed upon leaving the currrent scope unless otherwise told.
* is executed upon leaving the current scope unless otherwise told.
*
* The MakeGuard() function is used to create a new ScopeGuard object.
* It can be instantiated with a lambda function, a std::function<void()>,

View File

@ -32,7 +32,7 @@
#define CAFFE_SDT_ARGSIZE(x) (CAFFE_SDT_ISARRAY(x) ? sizeof(void*) : sizeof(x))
// Format of each probe arguments as operand.
// Size of the arugment tagged with CAFFE_SDT_Sn, with "n" constraint.
// Size of the argument tagged with CAFFE_SDT_Sn, with "n" constraint.
// Value of the argument tagged with CAFFE_SDT_An, with configured constraint.
#define CAFFE_SDT_ARG(n, x) \
[CAFFE_SDT_S##n] "n" ((size_t)CAFFE_SDT_ARGSIZE(x)), \

View File

@ -278,7 +278,7 @@ class CAFFE2_API Workspace {
ShouldContinue should_continue = StopOnSignal{});
/*
* Returns a CPU threadpool instace for parallel execution of
* Returns a CPU threadpool instance for parallel execution of
* work. The threadpool is created lazily; if no operators use it,
* then no threadpool will be created.
*/

View File

@ -31,7 +31,7 @@ import caffe2.python.models.resnet as resnet
'''
Simple benchmark that creates a data-parallel resnet-50 model
and measurs the time.
and measures the time.
'''

View File

@ -1023,7 +1023,7 @@ void TransformImage(
ColorNormalization<Context>(image_data, crop, channels, mean, std);
}
// Only crop / transose the image
// Only crop / transpose the image
// leave in uint8_t dataType
template <class Context>
void CropTransposeImage(

View File

@ -68,7 +68,7 @@
* The following example shows a general use case for the C++
* bindings, including support for the optional exception feature and
* also the supplied vector and string classes, see following sections for
* decriptions of these features.
* descriptions of these features.
*
* \code
* #define __CL_ENABLE_EXCEPTIONS

View File

@ -56,7 +56,7 @@ bool NNApi::run(const TensorVector& inputs, TensorVector* outputs) {
try {
init(inputs, outputs);
} catch (const std::exception& e) {
LOG(ERROR) << "Error duing model initialization: " << e.what();
LOG(ERROR) << "Error during model initialization: " << e.what();
return false;
}

View File

@ -1657,7 +1657,7 @@ Caffe2BackendRep* Caffe2Backend::Prepare(
}
}
// TODO: avoid extra copy by directly feed initialiers to backend blobs
// TODO: avoid extra copy by directly feed initializers to backend blobs
OnnxToCaffe2(
&rep->init_net(),
&rep->pred_net(),

View File

@ -185,7 +185,7 @@ void ssaRewriteForIfOp(
OperatorDef* op,
std::unordered_map<std::string, int>* blob_versions,
std::set<std::string>* is_initialized_tensor) {
// Get all the "external" inputs and outpus of the subnet
// Get all the "external" inputs and outputs of the subnet
// Since then_net and else_net has same external input/output, we only collect
// external input/output from one of its subnet And perform the rewrite to
// both then_net and else_net

View File

@ -111,7 +111,7 @@ class CAFFE2_API OnnxExporter {
const caffe2::OperatorDef& def,
const std::unordered_map<std::string, caffe2::TensorShape>& shapes);
// \brief Check black listed arguemnts where we won't pass down when
// \brief Check black listed arguments where we won't pass down when
// converting to ONNX node
bool IsBlackListed(const caffe2::Argument& arg);

View File

@ -138,7 +138,7 @@ ONNX_PYTORCH_OPERATOR_SET_SCHEMA(
OpSchema()
.SetDoc("Mirror Caffe2 BatchMatMul operator")
.Input(0, "X", "tensor of shape (dim0, dim1 ... M, K)", "T")
.Input(1, "Y", "tensor of shpae (dim0, dim2 ... K, N)", "T")
.Input(1, "Y", "tensor of shape (dim0, dim2 ... K, N)", "T")
.Output(0, "Z", "tensor of shape (dim0, dim1 ... M, N)", "T")
.TypeConstraint(
"T",

View File

@ -31,7 +31,7 @@ class CuDNNActivationOpBase : public Operator<CUDAContext> {
const cudnnDataType_t data_type,
const int data_size) {
if (data_size != input_size_) {
// Since the best performance is obtained when the tesor is HW-packed, we
// Since the best performance is obtained when the tensor is HW-packed, we
// put X.size() to W.
input_size_ = data_size;
CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(

View File

@ -69,7 +69,7 @@ The lengths is a 1D tensor that splits the following 'boundaries' argument.
The boundaries is a 1D tensor containing the border list for each feature.
With in each batch, `indices` should not have duplicate number,
and the number of elements in `indices` should be less than or euqal to `D`.
and the number of elements in `indices` should be less than or equal to `D`.
Each element in `lengths` vector (lengths[`i`]) represents
the number of boundaries in the sub border list.
The sum of all elements in `lengths` must be equal to the size of `boundaries`.

View File

@ -126,7 +126,7 @@ OPERATOR_SCHEMA(BatchMatMul)
Batch Matrix multiplication Yi = Ai * Bi, where A has shape (dim0, dim1, ... M, K),
B has shape (dim0, dim1, ... K, N), Y has shape (dim0, dim1, ... M, N) and i ranges
from 0 to (dim0 * dim1 ...) - 1. rank(A) == rank(B) >= 2. In case of A and B being
two diemnsional, it behaves like normal matrix multiplication.
two dimensional, it behaves like normal matrix multiplication.
)DOC")
.Input(0, "A", "tensor of shape (dim0, dim1 ... M, K)")
.Input(1, "B", "tensor of shape (dim0, dim1 ... K, N)")

View File

@ -46,7 +46,7 @@ OPERATOR_SCHEMA(BisectPercentile)
R_2 = [0.3, 1.2];
We will build R = [0.1, 0.4, 0.5, 0.3, 1.2]; besides, we have
lengths = [3, 2]
to indicate the boundries of the percentile information.
to indicate the boundaries of the percentile information.
)DOC")
.Arg(

View File

@ -51,7 +51,7 @@ class BoxWithNMSLimitOp final : public Operator<Context> {
"Unexpected soft_nms_method");
soft_nms_method_ = (soft_nms_method_str_ == "linear") ? 1 : 2;
// When input `boxes` doesn't inlcude background class, the score will skip
// When input `boxes` doesn't include background class, the score will skip
// background class and start with foreground classes directly, and put the
// background class in the end, i.e. score[:, 0:NUM_CLASSES-1] represents
// foreground classes and score[:,NUM_CLASSES] represents background class.

View File

@ -97,7 +97,7 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
}
protected:
// A helper function to set up the tensor Nd desriptor, depending on the order
// A helper function to set up the tensor Nd descriptor, depending on the order
// the group and the type given.
template <typename T>
void SetTensorNdDescriptorWithGroup(

View File

@ -209,7 +209,7 @@ OPERATOR_SCHEMA(SwapBestPath)
.NumInputs(2)
.NumOutputs(1)
.SetDoc(R"DOC(
Given a sequence of idices and a matrix, enforce that these indices have the
Given a sequence of indices and a matrix, enforce that these indices have the
best columnwise scores
score
)DOC")

View File

@ -170,7 +170,7 @@ In Advances in Neural Information Processing Systems, pp. 1508-1518. 2017.
)DOC")
.Input(0, "input", "Float32 input data")
.Output(0, "output", "Fused bitwidth, tail, min, max and quantized data")
.Arg("bitwidth", "How many bits to quantiz per data (defaults to 8).")
.Arg("bitwidth", "How many bits to quantize per data (defaults to 8).")
.Arg("random", "random or not (True). False is set up for unittest.");
NO_GRADIENT(FloatToFusedRandRowwiseQuantized);

View File

@ -184,7 +184,7 @@ class GatherOp : public Operator<Context> {
// an error.
// Right now, we apply index wrapping by default only to axis == 0,
// since we have ONNX conversion code that uses it. For other ops it
// needs to be speified explicitly with argument or you don't get it.
// needs to be specified explicitly with argument or you don't get it.
if (OperatorBase::HasArgument("wrap_indices")) {
wrap_indices_ = Operator<Context>::template GetSingleArgument<bool>(
"wrap_indices", (false));

View File

@ -69,7 +69,7 @@ CAFFE2_API ERArrXXf ComputeSortedAnchors(
} // namespace utils
// C++ implementation of GenerateProposalsOp
// Generate bounding box proposals for Faster RCNN. The propoasls are generated
// Generate bounding box proposals for Faster RCNN. The proposals are generated
// for a list of images based on image score 'score', bounding box
// regression result 'deltas' as well as predefined bounding box shapes
// 'anchors'. Greedy non-maximum suppression is applied to generate the

View File

@ -632,7 +632,7 @@ search tree.
.Arg("topN", "Number of nodes in outputs")
.Input(0, "X", "Input data from previous layer")
.Input(1, "W", "The matrix trained from Softmax Ops")
.Input(2, "b", "The bias traiend from Softmax Ops")
.Input(2, "b", "The bias trained from Softmax Ops")
.Output(
0,
"Y_names",

View File

@ -140,7 +140,7 @@ bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
}
assert(std::abs(delta(0)) <= MAX_DELTA);
assert(std::abs(delta(1)) <= MAX_DELTA);
// find maximum of detla scores
// find maximum of delta scores
keypoints(k, 0 * keypoint_count + j) =
x0 + (0.5 + maxX + delta(0)) * xLen / heatmap_size;
keypoints(k, 1 * keypoint_count + j) =

View File

@ -74,7 +74,7 @@ class SparseLengths8BitsRowwiseOp : public Operator<Context> {
in_block_size,
outputSize,
indices_size,
N, // embeding table length
N, // embedding table length
input_data,
indices,
lengths,

View File

@ -27,7 +27,7 @@ void ProcessBlob(
auto& blob_states = *blob_states_ptr;
if (blob_states.count(key) == 0) {
// We reset the blob so that any existing content is destroyed. This
// is to guaranee correct device placement: if we are deserializing
// is to guarantee correct device placement: if we are deserializing
// into a TensorCUDA, without explicit Reset we might be loading data
// into an existing TensorCUDA that has pre-allocated memory on a
// different GPU.

View File

@ -46,7 +46,7 @@ inline void LogCuDNNPerfStats(
// Easier indexing into force_algo_ vector,
// shared by CudnnConvTransposeOpBase and CudnnConvOpBase to force
// usage of a particular algortihm instead of searching
// usage of a particular algorithm instead of searching
enum { ALGO_FWD = 0, ALGO_WGRAD = 1, ALGO_DGRAD = 2 };
} // namespace caffe2

Some files were not shown because too many files have changed in this diff Show More