[BE][8/16] fix typos in torch/ (torch/csrc/jit/) (#156318)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156318
Approved by: https://github.com/albanD
This commit is contained in:
Xuehai Pan
2025-07-03 02:11:53 +08:00
committed by PyTorch MergeBot
parent c0e155a8d2
commit 541584d22e
45 changed files with 76 additions and 77 deletions

View File

@ -1177,7 +1177,6 @@ exclude_patterns = [
'torch/distributed/tensor/**',
'torch/[j-o]*/**',
'torch/utils/**',
'torch/csrc/jit/**',
'torch/csrc/jit/[a-o]*/**',
]
init_command = [

View File

@ -16,7 +16,7 @@ nin
nout
NowNs
optins
OT
ot
overrideable
padD
ptd

View File

@ -958,7 +958,7 @@ torch._C._jit_set_fusion_strategy([
])
```
This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occuring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.
This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occurring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.
### Pre-derivative Optimization ###

View File

@ -26,5 +26,5 @@ A brief summary of the source tree:
**Refer** to each folder for more in-depth documentation.
Other relevant parts of the codebase not contained here:
- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code re-used by other elements of the
- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code reused by other elements of the
runtime system (eager, mobile, etc.)

View File

@ -319,7 +319,7 @@ static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
}
static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
// Cutoff chosed by benchmarking on a TITAN V
// Cutoff chose by benchmarking on a TITAN V
return other_side_input.numel() <= 1024 * 2048;
}

View File

@ -96,7 +96,7 @@ static bool isBefore(Node* n1, Node* n2) {
}
}
// Now they are the same numer of blocks from the graph block,
// Now they are the same number of blocks from the graph block,
// recurse upwards, checking if they are on the same block
while (true) {
if (n1->owningBlock() == n2->owningBlock()) {

View File

@ -98,7 +98,7 @@ void InplaceMKLDNNSubgraph(const std::shared_ptr<Graph>& graph) {
// This function first calculates aliasing sets,
// then calculates the last node each aliasing set is alive for.
// Then we go through each node, if it's a node which has an equivalent
// inplace node and the aliasing set for its input is dead afer this node, we
// inplace node and the aliasing set for its input is dead after this node, we
// inplace it. Then we merge the aliasing sets for the input and output of the
// node and extend the liveness of the set. To inplace a node you need to
// prove device and dtype of the input and output are the same, which we've
@ -812,7 +812,7 @@ void ComputeSubgraphInMKLDNN(Node* subgraph_node) {
if (body_node->kind() == aten::conv2d ||
body_node->kind() == aten::conv3d) {
// this node doesnt handle string padding yet...
// this node doesn't handle string padding yet...
if (!body_node->namedInput("padding")->type()->cast<StringType>()) {
body_node->replaceWithNewSymbol(Symbol::prim("mkldnn_convolution"));
body_node->destroy();

View File

@ -167,7 +167,7 @@ std::shared_ptr<Graph> ToONNX(
ConstantValueMap::ClearMaps();
auto new_graph = std::make_shared<Graph>(graph->current_scope());
py::dict env;
// Kept identical to values in env. Used for constant-time existance check.
// Kept identical to values in env. Used for constant-time existence check.
py::set values_in_env;
try {
BlockToONNX(

View File

@ -17,7 +17,7 @@ namespace torch::jit {
// information. Shape and type information is only available after
// _jit_pass_onnx, which converts aten nodes to onnx nodes. So there is a
// interdependent issue. _jit_pass_onnx depends on preprocess passes to convert
// aten nodes into convertable condition, and preprocess passes depend on
// aten nodes into convertible condition, and preprocess passes depend on
// _jit_pass_onnx to convert upstream nodes and apply onnx shape inference.
// Separating the pass into two parts breaks the interdependency.
//

View File

@ -116,7 +116,7 @@ static std::vector<std::string> _single_input_general_shape_aten_funcs = {
"__getitem__",
};
// Theses are prim::CallFunctions for ops that doesn't require observation and
// These are prim::CallFunctions for ops that doesn't require observation and
// have a single input Tensor
// Also these ops do computation on the value of Tensor
// TODO: [Need verify] looks like we can quantize simple functionals that just
@ -136,7 +136,7 @@ static std::vector<std::string> _single_input_general_value_call_funcs = {
"leaky_relu",
};
// Theses are aten functions for ops that doesn't require observation and
// These are aten functions for ops that doesn't require observation and
// have a single input Tensor
// Also these ops do computation on the value of Tensor
// e.g. `aten::avg_pool2d(%input_tensor, ...)`

View File

@ -1702,7 +1702,7 @@ Module InsertObserversForOnDevicePTQ(
// you will have multiple getattrs for the same attribute and thus potentially
// multiple observers observing the same value. This will also lead to
// increased size of the packed param struct. I dont expect this to be a
// common pattern but something to be aware fo Note that current quant
// common pattern but something to be aware of Note that current quant
// workflow does not prevent this anyway since during inset quant dequant
// things are inlined anyway
helper.fillBoundaryValueMap(cloned_module, observer_method_name);

View File

@ -1622,7 +1622,7 @@ void InsertQuantDeQuantHelper::insertCalculateQParamsAndQuantizationOps(
void InsertQuantDeQuantHelper::runForOnDevicePTQ(
Module& module,
const std::string& method_name) {
// In all likelihood this really wont do anything because we expect that
// In all likelihood this really won't do anything because we expect that
// the input method for quantization's prepare step will be inlined. Thus
// only call methods we will see will belong to observer's forward calls.
for (auto& invoked_methods : getInvokedMethods(module, method_name)) {
@ -1834,8 +1834,8 @@ Module InsertQuantDeQuantOnDevicePTQ(
// ReplicateChooseQParamsQuantDequant: This is propagating dynamic quant's
// quant dequant RemoveRedundantQuantizationOps: THis is removing activation
// observers for dynamic quant when the op related to it is not dynamically
// quantizable. Doesnt really make sense. In our case we wont have those
// anyway since for dynamic quant activations wont be observed We can still
// quantizable. Doesn't really make sense. In our case we won't have those
// anyway since for dynamic quant activations won't be observed We can still
// use this function because the above two methods should really be a noop
h.propagateQuantizationOps(module);
return module;

View File

@ -206,7 +206,7 @@ QuantFusionInfo getFixedQParamOpFusionInfo(
%r = )";
op_pattern += op_name + "(" + "%a_dequant" + extra_op_arg_list + ")";
// IR pattern common to all ops with fixed quantization parameters for
// asymetric quantization
// asymmetric quantization
std::string asym_fixed_qparam_op_suffix = R"(
%r_scale : float = prim::Constant[value=0.00390625]()
%r_zero_point : int = prim::Constant[value=0]()

View File

@ -8,7 +8,7 @@ namespace torch::jit {
struct TORCH_API CanonicalizedSymbolicShape {
// TODO: Consider in the future if it is reasonable to
// merge code with SymbolicShape or VaryingShape while keeping
// the two not implicitly convertable (and cause bugs).
// the two not implicitly convertible (and cause bugs).
CanonicalizedSymbolicShape(
const c10::SymbolicShape& orig_shape,
std::unordered_map<int64_t, int64_t>& ss_map) {

View File

@ -396,7 +396,7 @@ void insertTypeGuard(
namespace {
bool has_unsupported_pin_memory(const Node* node) {
// cant support non-constant pin_memory or pin_memory = True
// can't support non-constant pin_memory or pin_memory = True
if (auto maybe_index = node->schema().argumentIndexWithName("pin_memory")) {
int index = *maybe_index;
auto inp = node->input(index);

View File

@ -66,7 +66,7 @@ TORCH_API bool isSupported(Node* node);
/// work with dynamic shapes unless explicitly register the shape function via
/// `torch::jit::RegisterShapeComputeGraphForSchema` for the custom operator.
///
/// @return Reference of the custome operator set
/// @return Reference of the custom operator set
///
TORCH_API OperatorSet& getCustomOperatorSet();

View File

@ -62,7 +62,7 @@ struct ValueMapper {
auto new_outputs = merged_node->outputs();
for (Value* v : new_outputs) {
auto maybe_last_use = firstOrLastUse(v, /*find_first*/ false);
// if it doesnt have a use it shouldnt have been added as output
// if it doesn't have a use it shouldn't have been added as output
TORCH_INTERNAL_ASSERT(maybe_last_use);
const Use last_use = *maybe_last_use;

View File

@ -2316,7 +2316,7 @@ void initJITBindings(PyObject* module) {
// Throw errors when calling wait() on the returned Future if
// any of the original futures would throw.
// NB: PythonFutureWrapper takes an unwrap_func which serves as a
// callback to evalute the value in the Future. RPC uses this
// callback to evaluate the value in the Future. RPC uses this
// unwrap_func to check whether the returned py::object is a
// RemoteException object, and re-throw the exception if it is.
// By extracting the c10::ivalue::Future from PythonFutureWrapper

View File

@ -809,7 +809,7 @@ std::pair<std::shared_ptr<Operator>, Stack> getOpWithStack(
}
// This function is used to check if the schema is valid for the given args and
// kwargs. It checks script object by checking wether the FakeScriptObject is
// kwargs. It checks script object by checking whether the FakeScriptObject is
// an instance of the corresponding fake class for the actual class used in
// schema.
bool checkSchemaAllowFakeScriptObject(

View File

@ -649,7 +649,7 @@ inline InferredType tryToInferContainerType(
"."));
} else {
// TODO: this message is not correct anymore, since this InferredType is
// used from a bunch of circumstances unrelated to tracing. We can re-use
// used from a bunch of circumstances unrelated to tracing. We can reuse
// this instead of the attribute_failure stuff in concreteType
return InferredType(c10::str(
"Only tensors and (possibly nested) tuples of tensors, lists, or dicts ",

View File

@ -99,7 +99,7 @@ struct C10_EXPORT ConcretePyObjectHolder final : PyObjectHolder {
py_obj_.ptr() = nullptr;
}
// explicit construction to avoid errornous implicit conversion and
// explicit construction to avoid erroneous implicit conversion and
// copy-initialization
explicit ConcretePyObjectHolder(py::object py_obj)
: py_obj_(std::move(py_obj)) {}

View File

@ -1223,7 +1223,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
obj.ptr() == py::module::import("torch.jit").attr("isinstance").ptr()) {
return SpecialFormValue::create(prim::isinstance);
#ifdef USE_RPC
// RPC module is only avaialble when build flag "USE_DISTRIBUTED" is on.
// RPC module is only available when build flag "USE_DISTRIBUTED" is on.
} else if (
isRpcAvailable &&
obj.ptr() ==
@ -1236,7 +1236,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
return SpecialFormValue::create(prim::rpc_sync);
} else if (
isRpcAvailable &&
// RPC module is only avaialble when build flag "USE_DISTRIBUTED" is on.
// RPC module is only available when build flag "USE_DISTRIBUTED" is on.
obj.ptr() ==
py::module::import("torch.distributed.rpc").attr("remote").ptr()) {
return SpecialFormValue::create(prim::rpc_remote);

View File

@ -68,7 +68,7 @@ struct VISIBILITY_HIDDEN PythonValue : public SugaredValue {
ErrorReport(loc)
<< kind() << " cannot be used as a value. "
<< "Perhaps it is a closed over global variable? If so, please "
<< "consider passing it in as an argument or use a local varible "
<< "consider passing it in as an argument or use a local variable "
<< "instead.");
}

View File

@ -89,7 +89,7 @@ std::pair<std::shared_ptr<Graph>, Stack> createGraphByTracingWithDict(
};
// The argument_names parameter is parsed in python and its order
// is the same as the arguments' decalaration order in forward() method.
// is the same as the arguments' declaration order in forward() method.
// These name shall be added to the graph as debug name and the order
// should align with the traceable stack we generated by the python dict.
std::vector<std::string> compact_argument_names;

View File

@ -55,7 +55,7 @@ C10_DEFINE_bool(
C10_DEFINE_bool(
torch_jit_enable_expanded_stacks,
false,
"When true we will attemps to pre-expand node stacks and cache expanded stacks.")
"When true we will attempts to pre-expand node stacks and cache expanded stacks.")
C10_DEFINE_bool(
torch_jit_expanded_stacks_mangled,

View File

@ -18,7 +18,7 @@ struct TORCH_API JITException : public std::runtime_error {
return python_class_name_;
}
// the original msg if this is from a python exception. The interpretor has
// the original msg if this is from a python exception. The interpreter has
// changed the original message by adding "The following operation failed in
// the TorchScript interpreter." in front of it in the handleError function.
std::optional<std::string> getOriginalMsg() const {

View File

@ -115,8 +115,8 @@ bool isSortableListOfObjectsOrTuples(
}
auto type = ivalues.get(0).type();
// We assume lists have homogenous types, use first element to determine
// best sorting methods. If in the future we need to support heterogenous
// We assume lists have homogeneous types, use first element to determine
// best sorting methods. If in the future we need to support heterogeneous
// types inside list, then sorting needs to have runtime sortable checks.
const size_t n = ivalues.size();
for (const auto i : c10::irange(n)) {
@ -1141,7 +1141,7 @@ static const std::vector<OperatorGeneratorArgs> opGenArgs{
//
// create a clone of these declarations with a _hacked_twin overload name
// and nullability scrubbed from TensorList arg types
// TOOD find out why this exists and how to do it without the hack
// TODO find out why this exists and how to do it without the hack
//
OperatorGeneratorArgs(
TORCH_SELECTIVE_SCHEMA(
@ -2839,7 +2839,7 @@ void hashValue(Stack& stack) {
}
static const std::vector<OperatorGeneratorArgs> opGenArgs2{
// registered as Any[] so that heterogenous tuples can be called with len()
// registered as Any[] so that heterogeneous tuples can be called with len()
OperatorGeneratorArgs(
TORCH_SELECTIVE_SCHEMA("aten::len.any(Any[] a) -> int"),
listLen,

View File

@ -3204,7 +3204,7 @@ def _batch_norm_with_update(input: List[int],
)=====")
+ std::string(R"=====(def broadcast_inplace(a: List[int],
b: List[int]) -> List[int]:
_0 = "The dims of tensor b ({}) must be less than or equal tothe dims of tensor a ({}) "
_0 = "The dims of tensor b ({}) must be less than or equal to the dims of tensor a ({}) "
_1 = "The size of tensor a {} must match the size of tensor b ({}) at non-singleton dimension {}"
dimsA = torch.len(a)
dimsB = torch.len(b)

View File

@ -71,7 +71,7 @@ Runtime instances in your code.
Static runtime's memory planner does two things:
1) Coalesces internal allocations for tensor storage
2) Does static analysis to figure out how to efficiently re-use memory.
2) Does static analysis to figure out how to efficiently reuse memory.
### Standard Resizing
Static runtime will record the space required for each intermediate managed tensor it sees

View File

@ -70,7 +70,7 @@ TORCH_API inline bool borrowsOutputs(c10::Symbol kind) {
// The output aliases that end up here are as a result of aliasDb failing to
// recognize them as outputs due to collection object (e.g., Tuple) aliasing
// inputs.
// Values that dont't show up in output_aliases or external_aliases are created
// Values that don't show up in output_aliases or external_aliases are created
// and consumed within the graph.
class ValueGroup {
public:
@ -111,7 +111,7 @@ class TORCH_API ManagedTensorRanges {
// If true, then this node is the last use of at least one
// managed tensor. availableTensorValuesAfterNode(node) will return a vector
// of the managed tensors that are available for re-use
// of the managed tensors that are available for reuse
// in the nodes following this one.
bool nodeFreesManagedTensors(Node* node) const;
const std::vector<const Value*>& availableTensorValuesAfterNode(
@ -141,7 +141,7 @@ class TORCH_API ManagedTensorRanges {
void extendInputLifetime(Node* node, size_t new_end);
// Maps Node* to the set of managed tensors that are now available
// for re-use after this node.
// for reuse after this node.
c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_{};
// Maps each Value* to its lifetime (start node index, end node index)
c10::FastMap<const Value*, Lifetime> value_lifetimes_{};

View File

@ -76,7 +76,7 @@ std::vector<StorageGroup> assignStorageToManagedTensors(
// This set maps each Value* to its assigned storage group.
c10::FastMap<const Value*, size_t> storage_group_mapping;
// On each iteration, this vector stores the set of storage groups that
// are available for re-use.
// are available for reuse.
std::vector<size_t> free_storage_groups;
auto makeNewStorageGroup = [&](const Value* value) {

View File

@ -529,7 +529,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
const auto in1_i = p_node->Input(1).toOptional<at::ScalarType>();
const auto in2_i = p_node->Input(2).toBool();
const auto in3_i = p_node->Input(3).toBool();
// To mimick the behavior of the JIT interpreter, if both dtype
// To mimic the behavior of the JIT interpreter, if both dtype
// and copy are not set, we return self. Otherwise, we assume
// that dtype is set.
if (!in1_i && !in3_i) {

View File

@ -214,7 +214,7 @@ struct TORCH_API BytecodeEmitMode {
// true: instruction of default argument values (like LOADC) is emitted.
// false: instruction of default argument values are not emitted. Instead
// they are fetched from operator schema.
// default_args_before_out_args (to forward compatibile support
// default_args_before_out_args (to forward compatible support
// operators allowing out arguments and default arguments):
// true: the number of specified arguments will deserialized to (#all_args -
// #default_args). false: the number of specified arguments will deserialized to

View File

@ -131,7 +131,7 @@ std::string get_named_tuple_str_or_default(
// str() return "Tensor" and repr_str() return "Tensor (inferred)". If
// it's not inferred type, str() return "Tensor[]" and repr_str()
// return "Tensor". In cpp, repr_str() will always return "Tensor"
// regardless inferred type. When exporing custom type in bytecode,
// regardless inferred type. When exporting custom type in bytecode,
// "Tensor" is the preferred way to deserialize Tensor type
std::string named_tuple_type_str = it->is_inferred_type()
? named_tuple_type->str()
@ -554,7 +554,7 @@ void ScriptModuleSerializer::writeArchive(
}
WriteableTensorData writable_td = getWriteableTensorData(td);
if (use_storage_context && serialized_tensors.count(tensor_name)) {
// storage has been serialzed already, skip
// storage has been serialized already, skip
continue;
}
writer_.writeRecord(
@ -698,10 +698,10 @@ void ScriptModuleSerializer::writeByteCode(
// debug handles.
// The reason we save debug handles conditionally is so that
// we dont end up with a model that has debug handles but has not
// debug map to correlate debug handels with.
// debug map to correlate debug handles with.
// Once we have a model with both handles and debug map, we can
// strip off debug map and have a lean model served to production.
// If exception ocurrs we have a model with debug map that can be
// If exception occurs we have a model with debug map that can be
// used to symbolicate debug handles
writeArchive(
debug_info_telements,

View File

@ -212,7 +212,7 @@ struct PythonPrintImpl {
// and would appear in the same order when the expression tree is
// reparsed.
// The last case can be checked
// because when we emit a expresion tree in the parser,
// because when we emit a expression tree in the parser,
// we do a left-to-right postorder traversal of the expression tree (emit
// children, then emit op). The reverse of this is a right-to-left preorder
// traversal of the tree. By doing a right-to-left preorder traversal of the
@ -222,12 +222,12 @@ struct PythonPrintImpl {
// expression.
// The inductive step is that the right-most input should be produced by the
// node immediatly before the current node if it is in tree order.
// node immediately before the current node if it is in tree order.
bool canInline(Value* v) {
Node* n = v->node();
// there must be only 1 values, otherwise we need an assignment to handle
// the multiple outout values
// the multiple output values
if (n->outputs().size() != 1)
return false;
// if it is used more than once, then we need a variable
@ -651,7 +651,7 @@ struct PythonPrintImpl {
// [reordering of inlines]
// We inline anything that is semantically legal to inline, but sometimes
// we find that these lines get too long. In that case we break the lines
/// and it is important that we un-inline all the inputs preceeding the long
/// and it is important that we un-inline all the inputs preceding the long
/// input:
// r = foo(x.add_(b), some_long + expression)
// wrong!
@ -1410,7 +1410,7 @@ struct PythonPrintImpl {
enforce_importable_(enforce_importable) {}
void printClass(const ClassTypePtr& classType) {
// If any of the methods are not Graph funtions, this indicates that
// If any of the methods are not Graph functions, this indicates that
// this class is a custom-bound C++ class. Skip serialization
// of this class, we will depend on the ClassType being defined
// in the target process.

View File

@ -44,7 +44,7 @@ void restoreAccurateTypeTags(const IValue& root, const TypePtr& type_tag) {
to_process.pop_back();
// ensure we only scan each pointer value once, otherwise this
// can become exponential (and if we allow recursive data in the future,
// it would not terminiate).
// it would not terminate).
if (w.value.isPtrType()) {
const void* key = w.value.internalToPointer();
auto it = scanned.find(key);
@ -490,7 +490,7 @@ PickleOpCode Unpickler::readInstruction() {
stack_.size(),
" and start index is ",
start,
", but stack_ is iterated by two elemenst at a time");
", but stack_ is iterated by two elements at a time");
for (size_t i = start; i < stack_.size(); i += 2) {
dict.insert_or_assign(stack_[i], stack_[i + 1]);
}

View File

@ -1437,7 +1437,7 @@ void nnc_aten_embedding(
r = at::embedding(weight, indices);
} catch (...) {
}
// TODO: have to copy output because at::embedding doesnt have an out
// TODO: have to copy output because at::embedding doesn't have an out
// variant and NNC's external calls don't support allocations
memcpy(buf_data[0], r.const_data_ptr(), r.element_size() * r.numel());
}

View File

@ -125,7 +125,7 @@ Dtype Intrinsics::IntrinsicsDtype(
IntrinsicsOp op_type,
const std::vector<ExprPtr>& params) {
// TODO: check the op_type and make a real decision
// Doesnt this fail with kRand?
// Doesn't this fail with kRand?
if (params.empty()) {
throw malformed_input("invalid params in Intrinsics");
} else if (params.size() == 1) {

View File

@ -930,7 +930,7 @@ ExprPtr PolynomialTransformer::mutate(const MulPtr& v) {
variable = lhs_new;
}
// Handle special case mul by 1 since thats safe for floating point, even if
// Handle special case mul by 1 since that's safe for floating point, even if
// it's Nan/Inf.
if (scalar && immediateEquals(scalar, 1)) {
auto c = alloc<Cast>(v->dtype(), variable);
@ -1105,8 +1105,8 @@ ExprPtr PolynomialTransformer::mutate(const DivPtr& v) {
return lhs_new;
}
// If numberator and denominator are equal the result is 1.
// Unless the demoninator could be zero.
// If numerator and denominator are equal the result is 1.
// Unless the denominator could be zero.
// if (hasher_.hash(lhs_new) == hasher_.hash(rhs_new)) {
// return getImmediateByType(v->dtype(), 1);
// }
@ -1745,7 +1745,7 @@ ExprPtr TermExpander::mutate(const TermPtr& v) {
std::vector<ExprPtr> vars;
std::vector<ExprPtr> multilaneVars;
// Assume we can reorder here because we wont merge floating terms.
// Assume we can reorder here because we won't merge floating terms.
ExprPtr lastNode{nullptr};
for (const auto& var : v->variables()) {
ExprPtr node = var->accept_mutator(this);
@ -1830,7 +1830,7 @@ static ExprPtr polyGCD(const PolynomialPtr& poly) {
ExprPtr scalar = poly->scalar();
const std::vector<TermPtr>& variables = poly->variables();
// We ony want to factorize if we're saving complete operations, i.e. no
// We only want to factorize if we're saving complete operations, i.e. no
// value in factorizing 6x + 4y into 2 * (3x + 2y) since we don't save work.
int opsSaved = 1; // default to saving the scalar.
long GCD = std::abs(immediateAs<long>(scalar));
@ -2088,7 +2088,7 @@ static ExprPtr simplifyRoundModPattern(const PolynomialPtr& poly) {
// TODO: for now don't attempt partial factorization of this
// optimization. E.g. it's possible to do: 2 * (x/y) * y + (x%y) => x +
// (x/y) * y but unsure thats actually much better, particularly with
// (x/y) * y but unsure that's actually much better, particularly with
// CSE.
if (!immediateEquals(
evaluateOp(alloc<Sub>(r->scalar(), m->scalar())), 0)) {

View File

@ -1263,11 +1263,11 @@ Tensor TensorExprKernel::convertSymbolicOutputToCorrectStrides(
const std::vector<size_t>& sorted_stride_indices_descending,
const std::vector<ExprPtr>& strides,
BufPtr& buf) {
// We need to convert the output tensor so that its values are layed
// We need to convert the output tensor so that its values are laid
// so that when viewed from the output strides the values are correct.
// A contiguous Tensor of size(2, 3) with values 0-5 is layed out as:
// A contiguous Tensor of size(2, 3) with values 0-5 is laid out as:
// [0] [1] [2] [3] [4] [5]
// The same valued tensor with strides (1, 2) would be layed out like
// The same valued tensor with strides (1, 2) would be laid out like
// [0] [3] [1] [4] [2] [5]
// When we are doing the re-ordering of values into the output tensor,
// we are iterating per-element of the input, and we are fixed
@ -1378,7 +1378,7 @@ Tensor TensorExprKernel::convertStaticShapeOutputToCorrectStrides(
tt->strides().concrete_sizes(),
buildErrorMessage("Output strides are unknown."));
const std::vector<int64_t> strides = *tt->strides().concrete_sizes();
// All Tensors in NNC are layed out in default, contiguous layout.
// All Tensors in NNC are laid out in default, contiguous layout.
// If the output is also default contiguous we don't need to do anything
if (strides == default_strides) {
return Tensor(buf, nullptr);

View File

@ -780,7 +780,7 @@ void LLVMCodeGenImpl::emitKernel(
GRAPH_DEBUG("\nLLVM generated assembly code\n\n", asmCode_, "\n");
}
// TODO: The binary ops are copypasta.
// TODO: The binary ops are copypaste.
void LLVMCodeGenImpl::visit(const AddPtr& v) {
v->lhs()->accept(this);
@ -878,7 +878,7 @@ void LLVMCodeGenImpl::visit(const OrPtr& v) {
bool rfp = rhs->getType()->isFPOrFPVectorTy();
if (!lfp && !rfp) {
value_ = irb_.CreateOr(lhs, rhs);
value_ = irb_.CreateOr(lhs, rhs); // codespell:ignore
} else {
throw malformed_input("llvm_codegen: bad type in Or", v);
}
@ -1225,7 +1225,7 @@ void LLVMCodeGenImpl::visit(const CastPtr& v) {
}
value_ = irb_.CreateFPCast(value_, dstType);
} else if (dstType->isIntOrIntVectorTy()) {
// Strictly casting from Float -> i8 doesnt give correct results
// Strictly casting from Float -> i8 doesn't give correct results
// set one bit true if the input float is not 0
if (v->dtype().scalar_type() == ScalarType::Bool) {
llvm::Value* zero =

View File

@ -987,7 +987,7 @@ void LoopNest::inlineIntermediateBufs(bool allow_duplicated_work) {
}
}
// all bufs will have at least one store (if they have > 1 they cant be
// all bufs will have at least one store (if they have > 1 they can't be
// inlined anyway)
size_t reads = uses.size() - 1;
// if only one read, we can inline it without duplicating work
@ -1843,11 +1843,11 @@ bool LoopNest::hasLoopCarriedDependence(const ForPtr& loop) {
auto bLoads = NodeFinder<Load>::find(*it2);
// ReadAfterWrite
for (auto& aStore : aStores) {
for (auto& bLoad : bLoads) {
for (auto& bLoad : bLoads) { // codespell:ignore
if (aStore->buf() == bLoad->buf()) {
if (!areIndicesLoopIndependent(
aStore->indices(), bLoad->indices(), outer_loop_vars)) {
if (isOverlapping(analyzer, aStore, bLoad)) {
if (isOverlapping(analyzer, aStore, bLoad)) { // codespell:ignore
return true;
}
}

View File

@ -240,7 +240,7 @@ class TORCH_API MemDependencyChecker : public IRVisitor {
std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
const StmtPtr& A) const;
// TODO: this will return only the AccessInfo for A. It's included for
// completeness but be aware it wont return accesses used in the computation
// completeness but be aware it won't return accesses used in the computation
// of A.
std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
const ExprPtr& A) const;

View File

@ -225,7 +225,7 @@ void RegisterizerAnalysis::visit(const ForPtr& v) {
// possible that an access at a higher scope could "unhide" the
// conditional access, in which case we need to hoist. If there is no
// access to this element at a higher scope then we cannot safely hoist.
// We cannot know at this level whether that will or wont occur.
// We cannot know at this level whether that will or won't occur.
//
// The solution we take here is to split the space-time continuum, and
// keep both versions of the access handy. If the hoisted access is not
@ -542,7 +542,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
closeAccessIntoScope(pCandidate, parent);
parentAccesses.erase(parentIt);
// the childs access inserted into the parent scope.
// the children access inserted into the parent scope.
closeAccessIntoScope(candidate, parent);
continue;
}
@ -567,7 +567,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
++it;
}
// Insert the childs closed access into the parent scope.
// Insert the children closed access into the parent scope.
closeAccessIntoScope(candidate, parent);
}

View File

@ -186,7 +186,7 @@ class AccessInfo {
bool firstUsageOverlapped_{false};
// The cost in real ops that this access represents, to enable
// filtering accesses that wont save any loads or stores.
// filtering accesses that won't save any loads or stores.
ExprPtr store_cost_;
ExprPtr load_cost_;