mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE][8/16] fix typos in torch/ (torch/csrc/jit/) (#156318)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/156318 Approved by: https://github.com/albanD
This commit is contained in:
committed by
PyTorch MergeBot
parent
c0e155a8d2
commit
541584d22e
@ -1177,7 +1177,6 @@ exclude_patterns = [
|
||||
'torch/distributed/tensor/**',
|
||||
'torch/[j-o]*/**',
|
||||
'torch/utils/**',
|
||||
'torch/csrc/jit/**',
|
||||
'torch/csrc/jit/[a-o]*/**',
|
||||
]
|
||||
init_command = [
|
||||
|
||||
@ -16,7 +16,7 @@ nin
|
||||
nout
|
||||
NowNs
|
||||
optins
|
||||
OT
|
||||
ot
|
||||
overrideable
|
||||
padD
|
||||
ptd
|
||||
|
||||
@ -958,7 +958,7 @@ torch._C._jit_set_fusion_strategy([
|
||||
])
|
||||
```
|
||||
|
||||
This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occuring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.
|
||||
This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occurring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.
|
||||
|
||||
### Pre-derivative Optimization ###
|
||||
|
||||
|
||||
@ -26,5 +26,5 @@ A brief summary of the source tree:
|
||||
**Refer** to each folder for more in-depth documentation.
|
||||
|
||||
Other relevant parts of the codebase not contained here:
|
||||
- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code re-used by other elements of the
|
||||
- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code reused by other elements of the
|
||||
runtime system (eager, mobile, etc.)
|
||||
|
||||
@ -319,7 +319,7 @@ static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
|
||||
}
|
||||
|
||||
static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
|
||||
// Cutoff chosed by benchmarking on a TITAN V
|
||||
// Cutoff chose by benchmarking on a TITAN V
|
||||
return other_side_input.numel() <= 1024 * 2048;
|
||||
}
|
||||
|
||||
|
||||
@ -96,7 +96,7 @@ static bool isBefore(Node* n1, Node* n2) {
|
||||
}
|
||||
}
|
||||
|
||||
// Now they are the same numer of blocks from the graph block,
|
||||
// Now they are the same number of blocks from the graph block,
|
||||
// recurse upwards, checking if they are on the same block
|
||||
while (true) {
|
||||
if (n1->owningBlock() == n2->owningBlock()) {
|
||||
|
||||
@ -98,7 +98,7 @@ void InplaceMKLDNNSubgraph(const std::shared_ptr<Graph>& graph) {
|
||||
// This function first calculates aliasing sets,
|
||||
// then calculates the last node each aliasing set is alive for.
|
||||
// Then we go through each node, if it's a node which has an equivalent
|
||||
// inplace node and the aliasing set for its input is dead afer this node, we
|
||||
// inplace node and the aliasing set for its input is dead after this node, we
|
||||
// inplace it. Then we merge the aliasing sets for the input and output of the
|
||||
// node and extend the liveness of the set. To inplace a node you need to
|
||||
// prove device and dtype of the input and output are the same, which we've
|
||||
@ -812,7 +812,7 @@ void ComputeSubgraphInMKLDNN(Node* subgraph_node) {
|
||||
|
||||
if (body_node->kind() == aten::conv2d ||
|
||||
body_node->kind() == aten::conv3d) {
|
||||
// this node doesnt handle string padding yet...
|
||||
// this node doesn't handle string padding yet...
|
||||
if (!body_node->namedInput("padding")->type()->cast<StringType>()) {
|
||||
body_node->replaceWithNewSymbol(Symbol::prim("mkldnn_convolution"));
|
||||
body_node->destroy();
|
||||
|
||||
@ -167,7 +167,7 @@ std::shared_ptr<Graph> ToONNX(
|
||||
ConstantValueMap::ClearMaps();
|
||||
auto new_graph = std::make_shared<Graph>(graph->current_scope());
|
||||
py::dict env;
|
||||
// Kept identical to values in env. Used for constant-time existance check.
|
||||
// Kept identical to values in env. Used for constant-time existence check.
|
||||
py::set values_in_env;
|
||||
try {
|
||||
BlockToONNX(
|
||||
|
||||
@ -17,7 +17,7 @@ namespace torch::jit {
|
||||
// information. Shape and type information is only available after
|
||||
// _jit_pass_onnx, which converts aten nodes to onnx nodes. So there is a
|
||||
// interdependent issue. _jit_pass_onnx depends on preprocess passes to convert
|
||||
// aten nodes into convertable condition, and preprocess passes depend on
|
||||
// aten nodes into convertible condition, and preprocess passes depend on
|
||||
// _jit_pass_onnx to convert upstream nodes and apply onnx shape inference.
|
||||
// Separating the pass into two parts breaks the interdependency.
|
||||
//
|
||||
|
||||
@ -116,7 +116,7 @@ static std::vector<std::string> _single_input_general_shape_aten_funcs = {
|
||||
"__getitem__",
|
||||
};
|
||||
|
||||
// Theses are prim::CallFunctions for ops that doesn't require observation and
|
||||
// These are prim::CallFunctions for ops that doesn't require observation and
|
||||
// have a single input Tensor
|
||||
// Also these ops do computation on the value of Tensor
|
||||
// TODO: [Need verify] looks like we can quantize simple functionals that just
|
||||
@ -136,7 +136,7 @@ static std::vector<std::string> _single_input_general_value_call_funcs = {
|
||||
"leaky_relu",
|
||||
};
|
||||
|
||||
// Theses are aten functions for ops that doesn't require observation and
|
||||
// These are aten functions for ops that doesn't require observation and
|
||||
// have a single input Tensor
|
||||
// Also these ops do computation on the value of Tensor
|
||||
// e.g. `aten::avg_pool2d(%input_tensor, ...)`
|
||||
|
||||
@ -1702,7 +1702,7 @@ Module InsertObserversForOnDevicePTQ(
|
||||
// you will have multiple getattrs for the same attribute and thus potentially
|
||||
// multiple observers observing the same value. This will also lead to
|
||||
// increased size of the packed param struct. I dont expect this to be a
|
||||
// common pattern but something to be aware fo Note that current quant
|
||||
// common pattern but something to be aware of Note that current quant
|
||||
// workflow does not prevent this anyway since during inset quant dequant
|
||||
// things are inlined anyway
|
||||
helper.fillBoundaryValueMap(cloned_module, observer_method_name);
|
||||
|
||||
@ -1622,7 +1622,7 @@ void InsertQuantDeQuantHelper::insertCalculateQParamsAndQuantizationOps(
|
||||
void InsertQuantDeQuantHelper::runForOnDevicePTQ(
|
||||
Module& module,
|
||||
const std::string& method_name) {
|
||||
// In all likelihood this really wont do anything because we expect that
|
||||
// In all likelihood this really won't do anything because we expect that
|
||||
// the input method for quantization's prepare step will be inlined. Thus
|
||||
// only call methods we will see will belong to observer's forward calls.
|
||||
for (auto& invoked_methods : getInvokedMethods(module, method_name)) {
|
||||
@ -1834,8 +1834,8 @@ Module InsertQuantDeQuantOnDevicePTQ(
|
||||
// ReplicateChooseQParamsQuantDequant: This is propagating dynamic quant's
|
||||
// quant dequant RemoveRedundantQuantizationOps: THis is removing activation
|
||||
// observers for dynamic quant when the op related to it is not dynamically
|
||||
// quantizable. Doesnt really make sense. In our case we wont have those
|
||||
// anyway since for dynamic quant activations wont be observed We can still
|
||||
// quantizable. Doesn't really make sense. In our case we won't have those
|
||||
// anyway since for dynamic quant activations won't be observed We can still
|
||||
// use this function because the above two methods should really be a noop
|
||||
h.propagateQuantizationOps(module);
|
||||
return module;
|
||||
|
||||
@ -206,7 +206,7 @@ QuantFusionInfo getFixedQParamOpFusionInfo(
|
||||
%r = )";
|
||||
op_pattern += op_name + "(" + "%a_dequant" + extra_op_arg_list + ")";
|
||||
// IR pattern common to all ops with fixed quantization parameters for
|
||||
// asymetric quantization
|
||||
// asymmetric quantization
|
||||
std::string asym_fixed_qparam_op_suffix = R"(
|
||||
%r_scale : float = prim::Constant[value=0.00390625]()
|
||||
%r_zero_point : int = prim::Constant[value=0]()
|
||||
|
||||
@ -8,7 +8,7 @@ namespace torch::jit {
|
||||
struct TORCH_API CanonicalizedSymbolicShape {
|
||||
// TODO: Consider in the future if it is reasonable to
|
||||
// merge code with SymbolicShape or VaryingShape while keeping
|
||||
// the two not implicitly convertable (and cause bugs).
|
||||
// the two not implicitly convertible (and cause bugs).
|
||||
CanonicalizedSymbolicShape(
|
||||
const c10::SymbolicShape& orig_shape,
|
||||
std::unordered_map<int64_t, int64_t>& ss_map) {
|
||||
|
||||
@ -396,7 +396,7 @@ void insertTypeGuard(
|
||||
|
||||
namespace {
|
||||
bool has_unsupported_pin_memory(const Node* node) {
|
||||
// cant support non-constant pin_memory or pin_memory = True
|
||||
// can't support non-constant pin_memory or pin_memory = True
|
||||
if (auto maybe_index = node->schema().argumentIndexWithName("pin_memory")) {
|
||||
int index = *maybe_index;
|
||||
auto inp = node->input(index);
|
||||
|
||||
@ -66,7 +66,7 @@ TORCH_API bool isSupported(Node* node);
|
||||
/// work with dynamic shapes unless explicitly register the shape function via
|
||||
/// `torch::jit::RegisterShapeComputeGraphForSchema` for the custom operator.
|
||||
///
|
||||
/// @return Reference of the custome operator set
|
||||
/// @return Reference of the custom operator set
|
||||
///
|
||||
TORCH_API OperatorSet& getCustomOperatorSet();
|
||||
|
||||
|
||||
@ -62,7 +62,7 @@ struct ValueMapper {
|
||||
auto new_outputs = merged_node->outputs();
|
||||
for (Value* v : new_outputs) {
|
||||
auto maybe_last_use = firstOrLastUse(v, /*find_first*/ false);
|
||||
// if it doesnt have a use it shouldnt have been added as output
|
||||
// if it doesn't have a use it shouldn't have been added as output
|
||||
TORCH_INTERNAL_ASSERT(maybe_last_use);
|
||||
const Use last_use = *maybe_last_use;
|
||||
|
||||
|
||||
@ -2316,7 +2316,7 @@ void initJITBindings(PyObject* module) {
|
||||
// Throw errors when calling wait() on the returned Future if
|
||||
// any of the original futures would throw.
|
||||
// NB: PythonFutureWrapper takes an unwrap_func which serves as a
|
||||
// callback to evalute the value in the Future. RPC uses this
|
||||
// callback to evaluate the value in the Future. RPC uses this
|
||||
// unwrap_func to check whether the returned py::object is a
|
||||
// RemoteException object, and re-throw the exception if it is.
|
||||
// By extracting the c10::ivalue::Future from PythonFutureWrapper
|
||||
|
||||
@ -809,7 +809,7 @@ std::pair<std::shared_ptr<Operator>, Stack> getOpWithStack(
|
||||
}
|
||||
|
||||
// This function is used to check if the schema is valid for the given args and
|
||||
// kwargs. It checks script object by checking wether the FakeScriptObject is
|
||||
// kwargs. It checks script object by checking whether the FakeScriptObject is
|
||||
// an instance of the corresponding fake class for the actual class used in
|
||||
// schema.
|
||||
bool checkSchemaAllowFakeScriptObject(
|
||||
|
||||
@ -649,7 +649,7 @@ inline InferredType tryToInferContainerType(
|
||||
"."));
|
||||
} else {
|
||||
// TODO: this message is not correct anymore, since this InferredType is
|
||||
// used from a bunch of circumstances unrelated to tracing. We can re-use
|
||||
// used from a bunch of circumstances unrelated to tracing. We can reuse
|
||||
// this instead of the attribute_failure stuff in concreteType
|
||||
return InferredType(c10::str(
|
||||
"Only tensors and (possibly nested) tuples of tensors, lists, or dicts ",
|
||||
|
||||
@ -99,7 +99,7 @@ struct C10_EXPORT ConcretePyObjectHolder final : PyObjectHolder {
|
||||
py_obj_.ptr() = nullptr;
|
||||
}
|
||||
|
||||
// explicit construction to avoid errornous implicit conversion and
|
||||
// explicit construction to avoid erroneous implicit conversion and
|
||||
// copy-initialization
|
||||
explicit ConcretePyObjectHolder(py::object py_obj)
|
||||
: py_obj_(std::move(py_obj)) {}
|
||||
|
||||
@ -1223,7 +1223,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
|
||||
obj.ptr() == py::module::import("torch.jit").attr("isinstance").ptr()) {
|
||||
return SpecialFormValue::create(prim::isinstance);
|
||||
#ifdef USE_RPC
|
||||
// RPC module is only avaialble when build flag "USE_DISTRIBUTED" is on.
|
||||
// RPC module is only available when build flag "USE_DISTRIBUTED" is on.
|
||||
} else if (
|
||||
isRpcAvailable &&
|
||||
obj.ptr() ==
|
||||
@ -1236,7 +1236,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
|
||||
return SpecialFormValue::create(prim::rpc_sync);
|
||||
} else if (
|
||||
isRpcAvailable &&
|
||||
// RPC module is only avaialble when build flag "USE_DISTRIBUTED" is on.
|
||||
// RPC module is only available when build flag "USE_DISTRIBUTED" is on.
|
||||
obj.ptr() ==
|
||||
py::module::import("torch.distributed.rpc").attr("remote").ptr()) {
|
||||
return SpecialFormValue::create(prim::rpc_remote);
|
||||
|
||||
@ -68,7 +68,7 @@ struct VISIBILITY_HIDDEN PythonValue : public SugaredValue {
|
||||
ErrorReport(loc)
|
||||
<< kind() << " cannot be used as a value. "
|
||||
<< "Perhaps it is a closed over global variable? If so, please "
|
||||
<< "consider passing it in as an argument or use a local varible "
|
||||
<< "consider passing it in as an argument or use a local variable "
|
||||
<< "instead.");
|
||||
}
|
||||
|
||||
|
||||
@ -89,7 +89,7 @@ std::pair<std::shared_ptr<Graph>, Stack> createGraphByTracingWithDict(
|
||||
};
|
||||
|
||||
// The argument_names parameter is parsed in python and its order
|
||||
// is the same as the arguments' decalaration order in forward() method.
|
||||
// is the same as the arguments' declaration order in forward() method.
|
||||
// These name shall be added to the graph as debug name and the order
|
||||
// should align with the traceable stack we generated by the python dict.
|
||||
std::vector<std::string> compact_argument_names;
|
||||
|
||||
@ -55,7 +55,7 @@ C10_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_enable_expanded_stacks,
|
||||
false,
|
||||
"When true we will attemps to pre-expand node stacks and cache expanded stacks.")
|
||||
"When true we will attempts to pre-expand node stacks and cache expanded stacks.")
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_expanded_stacks_mangled,
|
||||
|
||||
@ -18,7 +18,7 @@ struct TORCH_API JITException : public std::runtime_error {
|
||||
return python_class_name_;
|
||||
}
|
||||
|
||||
// the original msg if this is from a python exception. The interpretor has
|
||||
// the original msg if this is from a python exception. The interpreter has
|
||||
// changed the original message by adding "The following operation failed in
|
||||
// the TorchScript interpreter." in front of it in the handleError function.
|
||||
std::optional<std::string> getOriginalMsg() const {
|
||||
|
||||
@ -115,8 +115,8 @@ bool isSortableListOfObjectsOrTuples(
|
||||
}
|
||||
|
||||
auto type = ivalues.get(0).type();
|
||||
// We assume lists have homogenous types, use first element to determine
|
||||
// best sorting methods. If in the future we need to support heterogenous
|
||||
// We assume lists have homogeneous types, use first element to determine
|
||||
// best sorting methods. If in the future we need to support heterogeneous
|
||||
// types inside list, then sorting needs to have runtime sortable checks.
|
||||
const size_t n = ivalues.size();
|
||||
for (const auto i : c10::irange(n)) {
|
||||
@ -1141,7 +1141,7 @@ static const std::vector<OperatorGeneratorArgs> opGenArgs{
|
||||
//
|
||||
// create a clone of these declarations with a _hacked_twin overload name
|
||||
// and nullability scrubbed from TensorList arg types
|
||||
// TOOD find out why this exists and how to do it without the hack
|
||||
// TODO find out why this exists and how to do it without the hack
|
||||
//
|
||||
OperatorGeneratorArgs(
|
||||
TORCH_SELECTIVE_SCHEMA(
|
||||
@ -2839,7 +2839,7 @@ void hashValue(Stack& stack) {
|
||||
}
|
||||
|
||||
static const std::vector<OperatorGeneratorArgs> opGenArgs2{
|
||||
// registered as Any[] so that heterogenous tuples can be called with len()
|
||||
// registered as Any[] so that heterogeneous tuples can be called with len()
|
||||
OperatorGeneratorArgs(
|
||||
TORCH_SELECTIVE_SCHEMA("aten::len.any(Any[] a) -> int"),
|
||||
listLen,
|
||||
|
||||
@ -3204,7 +3204,7 @@ def _batch_norm_with_update(input: List[int],
|
||||
)=====")
|
||||
+ std::string(R"=====(def broadcast_inplace(a: List[int],
|
||||
b: List[int]) -> List[int]:
|
||||
_0 = "The dims of tensor b ({}) must be less than or equal tothe dims of tensor a ({}) "
|
||||
_0 = "The dims of tensor b ({}) must be less than or equal to the dims of tensor a ({}) "
|
||||
_1 = "The size of tensor a {} must match the size of tensor b ({}) at non-singleton dimension {}"
|
||||
dimsA = torch.len(a)
|
||||
dimsB = torch.len(b)
|
||||
|
||||
@ -71,7 +71,7 @@ Runtime instances in your code.
|
||||
Static runtime's memory planner does two things:
|
||||
|
||||
1) Coalesces internal allocations for tensor storage
|
||||
2) Does static analysis to figure out how to efficiently re-use memory.
|
||||
2) Does static analysis to figure out how to efficiently reuse memory.
|
||||
|
||||
### Standard Resizing
|
||||
Static runtime will record the space required for each intermediate managed tensor it sees
|
||||
|
||||
@ -70,7 +70,7 @@ TORCH_API inline bool borrowsOutputs(c10::Symbol kind) {
|
||||
// The output aliases that end up here are as a result of aliasDb failing to
|
||||
// recognize them as outputs due to collection object (e.g., Tuple) aliasing
|
||||
// inputs.
|
||||
// Values that dont't show up in output_aliases or external_aliases are created
|
||||
// Values that don't show up in output_aliases or external_aliases are created
|
||||
// and consumed within the graph.
|
||||
class ValueGroup {
|
||||
public:
|
||||
@ -111,7 +111,7 @@ class TORCH_API ManagedTensorRanges {
|
||||
|
||||
// If true, then this node is the last use of at least one
|
||||
// managed tensor. availableTensorValuesAfterNode(node) will return a vector
|
||||
// of the managed tensors that are available for re-use
|
||||
// of the managed tensors that are available for reuse
|
||||
// in the nodes following this one.
|
||||
bool nodeFreesManagedTensors(Node* node) const;
|
||||
const std::vector<const Value*>& availableTensorValuesAfterNode(
|
||||
@ -141,7 +141,7 @@ class TORCH_API ManagedTensorRanges {
|
||||
void extendInputLifetime(Node* node, size_t new_end);
|
||||
|
||||
// Maps Node* to the set of managed tensors that are now available
|
||||
// for re-use after this node.
|
||||
// for reuse after this node.
|
||||
c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_{};
|
||||
// Maps each Value* to its lifetime (start node index, end node index)
|
||||
c10::FastMap<const Value*, Lifetime> value_lifetimes_{};
|
||||
|
||||
@ -76,7 +76,7 @@ std::vector<StorageGroup> assignStorageToManagedTensors(
|
||||
// This set maps each Value* to its assigned storage group.
|
||||
c10::FastMap<const Value*, size_t> storage_group_mapping;
|
||||
// On each iteration, this vector stores the set of storage groups that
|
||||
// are available for re-use.
|
||||
// are available for reuse.
|
||||
std::vector<size_t> free_storage_groups;
|
||||
|
||||
auto makeNewStorageGroup = [&](const Value* value) {
|
||||
|
||||
@ -529,7 +529,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
|
||||
const auto in1_i = p_node->Input(1).toOptional<at::ScalarType>();
|
||||
const auto in2_i = p_node->Input(2).toBool();
|
||||
const auto in3_i = p_node->Input(3).toBool();
|
||||
// To mimick the behavior of the JIT interpreter, if both dtype
|
||||
// To mimic the behavior of the JIT interpreter, if both dtype
|
||||
// and copy are not set, we return self. Otherwise, we assume
|
||||
// that dtype is set.
|
||||
if (!in1_i && !in3_i) {
|
||||
|
||||
@ -214,7 +214,7 @@ struct TORCH_API BytecodeEmitMode {
|
||||
// true: instruction of default argument values (like LOADC) is emitted.
|
||||
// false: instruction of default argument values are not emitted. Instead
|
||||
// they are fetched from operator schema.
|
||||
// default_args_before_out_args (to forward compatibile support
|
||||
// default_args_before_out_args (to forward compatible support
|
||||
// operators allowing out arguments and default arguments):
|
||||
// true: the number of specified arguments will deserialized to (#all_args -
|
||||
// #default_args). false: the number of specified arguments will deserialized to
|
||||
|
||||
@ -131,7 +131,7 @@ std::string get_named_tuple_str_or_default(
|
||||
// str() return "Tensor" and repr_str() return "Tensor (inferred)". If
|
||||
// it's not inferred type, str() return "Tensor[]" and repr_str()
|
||||
// return "Tensor". In cpp, repr_str() will always return "Tensor"
|
||||
// regardless inferred type. When exporing custom type in bytecode,
|
||||
// regardless inferred type. When exporting custom type in bytecode,
|
||||
// "Tensor" is the preferred way to deserialize Tensor type
|
||||
std::string named_tuple_type_str = it->is_inferred_type()
|
||||
? named_tuple_type->str()
|
||||
@ -554,7 +554,7 @@ void ScriptModuleSerializer::writeArchive(
|
||||
}
|
||||
WriteableTensorData writable_td = getWriteableTensorData(td);
|
||||
if (use_storage_context && serialized_tensors.count(tensor_name)) {
|
||||
// storage has been serialzed already, skip
|
||||
// storage has been serialized already, skip
|
||||
continue;
|
||||
}
|
||||
writer_.writeRecord(
|
||||
@ -698,10 +698,10 @@ void ScriptModuleSerializer::writeByteCode(
|
||||
// debug handles.
|
||||
// The reason we save debug handles conditionally is so that
|
||||
// we dont end up with a model that has debug handles but has not
|
||||
// debug map to correlate debug handels with.
|
||||
// debug map to correlate debug handles with.
|
||||
// Once we have a model with both handles and debug map, we can
|
||||
// strip off debug map and have a lean model served to production.
|
||||
// If exception ocurrs we have a model with debug map that can be
|
||||
// If exception occurs we have a model with debug map that can be
|
||||
// used to symbolicate debug handles
|
||||
writeArchive(
|
||||
debug_info_telements,
|
||||
|
||||
@ -212,7 +212,7 @@ struct PythonPrintImpl {
|
||||
// and would appear in the same order when the expression tree is
|
||||
// reparsed.
|
||||
// The last case can be checked
|
||||
// because when we emit a expresion tree in the parser,
|
||||
// because when we emit a expression tree in the parser,
|
||||
// we do a left-to-right postorder traversal of the expression tree (emit
|
||||
// children, then emit op). The reverse of this is a right-to-left preorder
|
||||
// traversal of the tree. By doing a right-to-left preorder traversal of the
|
||||
@ -222,12 +222,12 @@ struct PythonPrintImpl {
|
||||
// expression.
|
||||
|
||||
// The inductive step is that the right-most input should be produced by the
|
||||
// node immediatly before the current node if it is in tree order.
|
||||
// node immediately before the current node if it is in tree order.
|
||||
|
||||
bool canInline(Value* v) {
|
||||
Node* n = v->node();
|
||||
// there must be only 1 values, otherwise we need an assignment to handle
|
||||
// the multiple outout values
|
||||
// the multiple output values
|
||||
if (n->outputs().size() != 1)
|
||||
return false;
|
||||
// if it is used more than once, then we need a variable
|
||||
@ -651,7 +651,7 @@ struct PythonPrintImpl {
|
||||
// [reordering of inlines]
|
||||
// We inline anything that is semantically legal to inline, but sometimes
|
||||
// we find that these lines get too long. In that case we break the lines
|
||||
/// and it is important that we un-inline all the inputs preceeding the long
|
||||
/// and it is important that we un-inline all the inputs preceding the long
|
||||
/// input:
|
||||
// r = foo(x.add_(b), some_long + expression)
|
||||
// wrong!
|
||||
@ -1410,7 +1410,7 @@ struct PythonPrintImpl {
|
||||
enforce_importable_(enforce_importable) {}
|
||||
|
||||
void printClass(const ClassTypePtr& classType) {
|
||||
// If any of the methods are not Graph funtions, this indicates that
|
||||
// If any of the methods are not Graph functions, this indicates that
|
||||
// this class is a custom-bound C++ class. Skip serialization
|
||||
// of this class, we will depend on the ClassType being defined
|
||||
// in the target process.
|
||||
|
||||
@ -44,7 +44,7 @@ void restoreAccurateTypeTags(const IValue& root, const TypePtr& type_tag) {
|
||||
to_process.pop_back();
|
||||
// ensure we only scan each pointer value once, otherwise this
|
||||
// can become exponential (and if we allow recursive data in the future,
|
||||
// it would not terminiate).
|
||||
// it would not terminate).
|
||||
if (w.value.isPtrType()) {
|
||||
const void* key = w.value.internalToPointer();
|
||||
auto it = scanned.find(key);
|
||||
@ -490,7 +490,7 @@ PickleOpCode Unpickler::readInstruction() {
|
||||
stack_.size(),
|
||||
" and start index is ",
|
||||
start,
|
||||
", but stack_ is iterated by two elemenst at a time");
|
||||
", but stack_ is iterated by two elements at a time");
|
||||
for (size_t i = start; i < stack_.size(); i += 2) {
|
||||
dict.insert_or_assign(stack_[i], stack_[i + 1]);
|
||||
}
|
||||
|
||||
@ -1437,7 +1437,7 @@ void nnc_aten_embedding(
|
||||
r = at::embedding(weight, indices);
|
||||
} catch (...) {
|
||||
}
|
||||
// TODO: have to copy output because at::embedding doesnt have an out
|
||||
// TODO: have to copy output because at::embedding doesn't have an out
|
||||
// variant and NNC's external calls don't support allocations
|
||||
memcpy(buf_data[0], r.const_data_ptr(), r.element_size() * r.numel());
|
||||
}
|
||||
|
||||
@ -125,7 +125,7 @@ Dtype Intrinsics::IntrinsicsDtype(
|
||||
IntrinsicsOp op_type,
|
||||
const std::vector<ExprPtr>& params) {
|
||||
// TODO: check the op_type and make a real decision
|
||||
// Doesnt this fail with kRand?
|
||||
// Doesn't this fail with kRand?
|
||||
if (params.empty()) {
|
||||
throw malformed_input("invalid params in Intrinsics");
|
||||
} else if (params.size() == 1) {
|
||||
|
||||
@ -930,7 +930,7 @@ ExprPtr PolynomialTransformer::mutate(const MulPtr& v) {
|
||||
variable = lhs_new;
|
||||
}
|
||||
|
||||
// Handle special case mul by 1 since thats safe for floating point, even if
|
||||
// Handle special case mul by 1 since that's safe for floating point, even if
|
||||
// it's Nan/Inf.
|
||||
if (scalar && immediateEquals(scalar, 1)) {
|
||||
auto c = alloc<Cast>(v->dtype(), variable);
|
||||
@ -1105,8 +1105,8 @@ ExprPtr PolynomialTransformer::mutate(const DivPtr& v) {
|
||||
return lhs_new;
|
||||
}
|
||||
|
||||
// If numberator and denominator are equal the result is 1.
|
||||
// Unless the demoninator could be zero.
|
||||
// If numerator and denominator are equal the result is 1.
|
||||
// Unless the denominator could be zero.
|
||||
// if (hasher_.hash(lhs_new) == hasher_.hash(rhs_new)) {
|
||||
// return getImmediateByType(v->dtype(), 1);
|
||||
// }
|
||||
@ -1745,7 +1745,7 @@ ExprPtr TermExpander::mutate(const TermPtr& v) {
|
||||
std::vector<ExprPtr> vars;
|
||||
std::vector<ExprPtr> multilaneVars;
|
||||
|
||||
// Assume we can reorder here because we wont merge floating terms.
|
||||
// Assume we can reorder here because we won't merge floating terms.
|
||||
ExprPtr lastNode{nullptr};
|
||||
for (const auto& var : v->variables()) {
|
||||
ExprPtr node = var->accept_mutator(this);
|
||||
@ -1830,7 +1830,7 @@ static ExprPtr polyGCD(const PolynomialPtr& poly) {
|
||||
ExprPtr scalar = poly->scalar();
|
||||
const std::vector<TermPtr>& variables = poly->variables();
|
||||
|
||||
// We ony want to factorize if we're saving complete operations, i.e. no
|
||||
// We only want to factorize if we're saving complete operations, i.e. no
|
||||
// value in factorizing 6x + 4y into 2 * (3x + 2y) since we don't save work.
|
||||
int opsSaved = 1; // default to saving the scalar.
|
||||
long GCD = std::abs(immediateAs<long>(scalar));
|
||||
@ -2088,7 +2088,7 @@ static ExprPtr simplifyRoundModPattern(const PolynomialPtr& poly) {
|
||||
|
||||
// TODO: for now don't attempt partial factorization of this
|
||||
// optimization. E.g. it's possible to do: 2 * (x/y) * y + (x%y) => x +
|
||||
// (x/y) * y but unsure thats actually much better, particularly with
|
||||
// (x/y) * y but unsure that's actually much better, particularly with
|
||||
// CSE.
|
||||
if (!immediateEquals(
|
||||
evaluateOp(alloc<Sub>(r->scalar(), m->scalar())), 0)) {
|
||||
|
||||
@ -1263,11 +1263,11 @@ Tensor TensorExprKernel::convertSymbolicOutputToCorrectStrides(
|
||||
const std::vector<size_t>& sorted_stride_indices_descending,
|
||||
const std::vector<ExprPtr>& strides,
|
||||
BufPtr& buf) {
|
||||
// We need to convert the output tensor so that its values are layed
|
||||
// We need to convert the output tensor so that its values are laid
|
||||
// so that when viewed from the output strides the values are correct.
|
||||
// A contiguous Tensor of size(2, 3) with values 0-5 is layed out as:
|
||||
// A contiguous Tensor of size(2, 3) with values 0-5 is laid out as:
|
||||
// [0] [1] [2] [3] [4] [5]
|
||||
// The same valued tensor with strides (1, 2) would be layed out like
|
||||
// The same valued tensor with strides (1, 2) would be laid out like
|
||||
// [0] [3] [1] [4] [2] [5]
|
||||
// When we are doing the re-ordering of values into the output tensor,
|
||||
// we are iterating per-element of the input, and we are fixed
|
||||
@ -1378,7 +1378,7 @@ Tensor TensorExprKernel::convertStaticShapeOutputToCorrectStrides(
|
||||
tt->strides().concrete_sizes(),
|
||||
buildErrorMessage("Output strides are unknown."));
|
||||
const std::vector<int64_t> strides = *tt->strides().concrete_sizes();
|
||||
// All Tensors in NNC are layed out in default, contiguous layout.
|
||||
// All Tensors in NNC are laid out in default, contiguous layout.
|
||||
// If the output is also default contiguous we don't need to do anything
|
||||
if (strides == default_strides) {
|
||||
return Tensor(buf, nullptr);
|
||||
|
||||
@ -780,7 +780,7 @@ void LLVMCodeGenImpl::emitKernel(
|
||||
GRAPH_DEBUG("\nLLVM generated assembly code\n\n", asmCode_, "\n");
|
||||
}
|
||||
|
||||
// TODO: The binary ops are copypasta.
|
||||
// TODO: The binary ops are copypaste.
|
||||
|
||||
void LLVMCodeGenImpl::visit(const AddPtr& v) {
|
||||
v->lhs()->accept(this);
|
||||
@ -878,7 +878,7 @@ void LLVMCodeGenImpl::visit(const OrPtr& v) {
|
||||
bool rfp = rhs->getType()->isFPOrFPVectorTy();
|
||||
|
||||
if (!lfp && !rfp) {
|
||||
value_ = irb_.CreateOr(lhs, rhs);
|
||||
value_ = irb_.CreateOr(lhs, rhs); // codespell:ignore
|
||||
} else {
|
||||
throw malformed_input("llvm_codegen: bad type in Or", v);
|
||||
}
|
||||
@ -1225,7 +1225,7 @@ void LLVMCodeGenImpl::visit(const CastPtr& v) {
|
||||
}
|
||||
value_ = irb_.CreateFPCast(value_, dstType);
|
||||
} else if (dstType->isIntOrIntVectorTy()) {
|
||||
// Strictly casting from Float -> i8 doesnt give correct results
|
||||
// Strictly casting from Float -> i8 doesn't give correct results
|
||||
// set one bit true if the input float is not 0
|
||||
if (v->dtype().scalar_type() == ScalarType::Bool) {
|
||||
llvm::Value* zero =
|
||||
|
||||
@ -987,7 +987,7 @@ void LoopNest::inlineIntermediateBufs(bool allow_duplicated_work) {
|
||||
}
|
||||
}
|
||||
|
||||
// all bufs will have at least one store (if they have > 1 they cant be
|
||||
// all bufs will have at least one store (if they have > 1 they can't be
|
||||
// inlined anyway)
|
||||
size_t reads = uses.size() - 1;
|
||||
// if only one read, we can inline it without duplicating work
|
||||
@ -1843,11 +1843,11 @@ bool LoopNest::hasLoopCarriedDependence(const ForPtr& loop) {
|
||||
auto bLoads = NodeFinder<Load>::find(*it2);
|
||||
// ReadAfterWrite
|
||||
for (auto& aStore : aStores) {
|
||||
for (auto& bLoad : bLoads) {
|
||||
for (auto& bLoad : bLoads) { // codespell:ignore
|
||||
if (aStore->buf() == bLoad->buf()) {
|
||||
if (!areIndicesLoopIndependent(
|
||||
aStore->indices(), bLoad->indices(), outer_loop_vars)) {
|
||||
if (isOverlapping(analyzer, aStore, bLoad)) {
|
||||
if (isOverlapping(analyzer, aStore, bLoad)) { // codespell:ignore
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -240,7 +240,7 @@ class TORCH_API MemDependencyChecker : public IRVisitor {
|
||||
std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
|
||||
const StmtPtr& A) const;
|
||||
// TODO: this will return only the AccessInfo for A. It's included for
|
||||
// completeness but be aware it wont return accesses used in the computation
|
||||
// completeness but be aware it won't return accesses used in the computation
|
||||
// of A.
|
||||
std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
|
||||
const ExprPtr& A) const;
|
||||
|
||||
@ -225,7 +225,7 @@ void RegisterizerAnalysis::visit(const ForPtr& v) {
|
||||
// possible that an access at a higher scope could "unhide" the
|
||||
// conditional access, in which case we need to hoist. If there is no
|
||||
// access to this element at a higher scope then we cannot safely hoist.
|
||||
// We cannot know at this level whether that will or wont occur.
|
||||
// We cannot know at this level whether that will or won't occur.
|
||||
//
|
||||
// The solution we take here is to split the space-time continuum, and
|
||||
// keep both versions of the access handy. If the hoisted access is not
|
||||
@ -542,7 +542,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
|
||||
closeAccessIntoScope(pCandidate, parent);
|
||||
parentAccesses.erase(parentIt);
|
||||
|
||||
// the childs access inserted into the parent scope.
|
||||
// the children access inserted into the parent scope.
|
||||
closeAccessIntoScope(candidate, parent);
|
||||
continue;
|
||||
}
|
||||
@ -567,7 +567,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
|
||||
++it;
|
||||
}
|
||||
|
||||
// Insert the childs closed access into the parent scope.
|
||||
// Insert the children closed access into the parent scope.
|
||||
closeAccessIntoScope(candidate, parent);
|
||||
}
|
||||
|
||||
|
||||
@ -186,7 +186,7 @@ class AccessInfo {
|
||||
bool firstUsageOverlapped_{false};
|
||||
|
||||
// The cost in real ops that this access represents, to enable
|
||||
// filtering accesses that wont save any loads or stores.
|
||||
// filtering accesses that won't save any loads or stores.
|
||||
ExprPtr store_cost_;
|
||||
ExprPtr load_cost_;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user