mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE][9/16] fix typos in torch/ (torch/csrc/) (#156319)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/156319 Approved by: https://github.com/albanD ghstack dependencies: #156313, #156314, #156315, #156316, #156317
This commit is contained in:
committed by
PyTorch MergeBot
parent
ee72815f11
commit
a23ccaa847
@ -1179,7 +1179,6 @@ exclude_patterns = [
|
|||||||
'torch/utils/**',
|
'torch/utils/**',
|
||||||
'torch/csrc/jit/**',
|
'torch/csrc/jit/**',
|
||||||
'torch/csrc/jit/[a-o]*/**',
|
'torch/csrc/jit/[a-o]*/**',
|
||||||
'torch/csrc/[a-i]*/**',
|
|
||||||
'torch/csrc/distributed/**',
|
'torch/csrc/distributed/**',
|
||||||
]
|
]
|
||||||
init_command = [
|
init_command = [
|
||||||
|
|||||||
@ -15,7 +15,7 @@ namespace torch::data {
|
|||||||
/// A dataloader for stateless datasets.
|
/// A dataloader for stateless datasets.
|
||||||
///
|
///
|
||||||
/// This dataloader follows the traditional PyTorch dataloader design, whereby a
|
/// This dataloader follows the traditional PyTorch dataloader design, whereby a
|
||||||
/// (posssibly) stateful sampler produces *batch requests* for a stateless
|
/// (possibly) stateful sampler produces *batch requests* for a stateless
|
||||||
/// dataset, which acts as a simple batch request to batch mapping. The batch
|
/// dataset, which acts as a simple batch request to batch mapping. The batch
|
||||||
/// request will often be an array of indices, and if the dataset is a simple
|
/// request will often be an array of indices, and if the dataset is a simple
|
||||||
/// image dataset, the dataset would produce the images at those indices.
|
/// image dataset, the dataset would produce the images at those indices.
|
||||||
|
|||||||
@ -234,7 +234,7 @@ class BatchDataBuffer {
|
|||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
||||||
ExampleSampler& example_sampler_;
|
ExampleSampler& example_sampler_;
|
||||||
|
|
||||||
// configurable maximun number of elements the queue can hold at one time.
|
// configurable maximum number of elements the queue can hold at one time.
|
||||||
size_t queue_capacity_;
|
size_t queue_capacity_;
|
||||||
|
|
||||||
// When set to true, it wakes the writer threads from the wait and exit
|
// When set to true, it wakes the writer threads from the wait and exit
|
||||||
@ -286,7 +286,7 @@ struct ChunkDatasetOptions {
|
|||||||
/// The capacity of the queue for batch caching.
|
/// The capacity of the queue for batch caching.
|
||||||
TORCH_ARG(size_t, cache_size) = 2048;
|
TORCH_ARG(size_t, cache_size) = 2048;
|
||||||
|
|
||||||
// The number of chunks to perfrom cross-chunk shuffling. Default to 1 meaning
|
// The number of chunks to perform cross-chunk shuffling. Default to 1 meaning
|
||||||
// no cross-chunk shuffling. When it is equal to n (n > 1), n random
|
// no cross-chunk shuffling. When it is equal to n (n > 1), n random
|
||||||
// chunks will be loaded at once and example shuffling will be performed
|
// chunks will be loaded at once and example shuffling will be performed
|
||||||
// across all those n chunks.
|
// across all those n chunks.
|
||||||
@ -303,9 +303,10 @@ struct ChunkDatasetOptions {
|
|||||||
///
|
///
|
||||||
/// Unlike regular dataset, chunk dataset require two samplers to operate and
|
/// Unlike regular dataset, chunk dataset require two samplers to operate and
|
||||||
/// keeps an internal state. `ChunkSampler` selects, which chunk to load next,
|
/// keeps an internal state. `ChunkSampler` selects, which chunk to load next,
|
||||||
/// while the `ExampleSampler` determins the order of Examples that are returned
|
/// while the `ExampleSampler` determines the order of Examples that are
|
||||||
/// in each `get_batch` call. The hierarchical sampling approach used here is
|
/// returned in each `get_batch` call. The hierarchical sampling approach used
|
||||||
/// inspired by this paper http://martin.zinkevich.org/publications/nips2010.pdf
|
/// here is inspired by this paper
|
||||||
|
/// http://martin.zinkevich.org/publications/nips2010.pdf
|
||||||
template <
|
template <
|
||||||
typename ChunkReader,
|
typename ChunkReader,
|
||||||
typename ChunkSampler = samplers::RandomSampler,
|
typename ChunkSampler = samplers::RandomSampler,
|
||||||
@ -346,7 +347,7 @@ class ChunkDataset final
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Default get_batch method of BatchDataset. This method returns
|
/// Default get_batch method of BatchDataset. This method returns
|
||||||
/// Example batches created from the preloaded chunks. The implemenation
|
/// Example batches created from the preloaded chunks. The implementation
|
||||||
/// is dataset agnostic and does not need overriding in different chunk
|
/// is dataset agnostic and does not need overriding in different chunk
|
||||||
/// datasets.
|
/// datasets.
|
||||||
BatchType get_batch(size_t batch_size) override {
|
BatchType get_batch(size_t batch_size) override {
|
||||||
|
|||||||
@ -24,7 +24,7 @@ class Sampler {
|
|||||||
|
|
||||||
/// Resets the `Sampler`'s internal state.
|
/// Resets the `Sampler`'s internal state.
|
||||||
/// Typically called before a new epoch.
|
/// Typically called before a new epoch.
|
||||||
/// Optionally, accepts a new size when reseting the sampler.
|
/// Optionally, accepts a new size when resetting the sampler.
|
||||||
virtual void reset(std::optional<size_t> new_size) = 0;
|
virtual void reset(std::optional<size_t> new_size) = 0;
|
||||||
|
|
||||||
/// Returns the next index if possible, or an empty optional if the
|
/// Returns the next index if possible, or an empty optional if the
|
||||||
|
|||||||
@ -344,7 +344,7 @@ namespace detail {
|
|||||||
inline Tensor glu(const Tensor& input, int64_t dim) {
|
inline Tensor glu(const Tensor& input, int64_t dim) {
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
input.dim() != 0,
|
input.dim() != 0,
|
||||||
"glu does not suppport scalars because halving size must be even");
|
"glu does not support scalars because halving size must be even");
|
||||||
return torch::glu(input, dim);
|
return torch::glu(input, dim);
|
||||||
}
|
}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|||||||
@ -130,7 +130,7 @@ class ModuleDictImpl : public Cloneable<ModuleDictImpl> {
|
|||||||
return modules_.is_empty();
|
return modules_.is_empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the centain parameter with the key in the `ModuleDict`.
|
/// Check if the certain parameter with the key in the `ModuleDict`.
|
||||||
bool contains(const std::string& key) const noexcept {
|
bool contains(const std::string& key) const noexcept {
|
||||||
return modules_.contains(key);
|
return modules_.contains(key);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -107,7 +107,7 @@ class ParameterDictImpl : public Cloneable<ParameterDictImpl> {
|
|||||||
parameters_.clear();
|
parameters_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the centain parameter with the key in the ParameterDict
|
/// Check if the certain parameter with the key in the ParameterDict
|
||||||
bool contains(const std::string& key) const noexcept {
|
bool contains(const std::string& key) const noexcept {
|
||||||
return parameters_.contains(key);
|
return parameters_.contains(key);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -101,7 +101,7 @@ class TORCH_API InputArchive final {
|
|||||||
std::vector<std::string> keys();
|
std::vector<std::string> keys();
|
||||||
|
|
||||||
/// Forwards all arguments to `read()`.
|
/// Forwards all arguments to `read()`.
|
||||||
/// Useful for generic code that can be re-used for both `InputArchive` and
|
/// Useful for generic code that can be reused for both `InputArchive` and
|
||||||
/// `OutputArchive` (where `operator()` forwards to `write()`).
|
/// `OutputArchive` (where `operator()` forwards to `write()`).
|
||||||
template <typename... Ts>
|
template <typename... Ts>
|
||||||
void operator()(Ts&&... ts) {
|
void operator()(Ts&&... ts) {
|
||||||
|
|||||||
@ -66,7 +66,7 @@ class TORCH_API OutputArchive final {
|
|||||||
void save_to(const std::function<size_t(const void*, size_t)>& func);
|
void save_to(const std::function<size_t(const void*, size_t)>& func);
|
||||||
|
|
||||||
/// Forwards all arguments to `write()`.
|
/// Forwards all arguments to `write()`.
|
||||||
/// Useful for generic code that can be re-used for both `OutputArchive` and
|
/// Useful for generic code that can be reused for both `OutputArchive` and
|
||||||
/// `InputArchive` (where `operator()` forwards to `read()`).
|
/// `InputArchive` (where `operator()` forwards to `read()`).
|
||||||
template <typename... Ts>
|
template <typename... Ts>
|
||||||
void operator()(Ts&&... ts) {
|
void operator()(Ts&&... ts) {
|
||||||
|
|||||||
@ -19,7 +19,7 @@ TransformerEncoderLayerImpl::TransformerEncoderLayerImpl(
|
|||||||
|
|
||||||
void TransformerEncoderLayerImpl::reset() {
|
void TransformerEncoderLayerImpl::reset() {
|
||||||
// NOTE: reset() is for initializing the model only, calling reset() after the
|
// NOTE: reset() is for initializing the model only, calling reset() after the
|
||||||
// model is created will throw exceptionss. Call reset_parameter() if the
|
// model is created will throw exceptions. Call reset_parameter() if the
|
||||||
// created model needs a reset
|
// created model needs a reset
|
||||||
|
|
||||||
self_attn = this->register_module(
|
self_attn = this->register_module(
|
||||||
|
|||||||
@ -2904,7 +2904,7 @@ Tensor softplus_double_backward(
|
|||||||
// 4. Return the as_strided view of the storage tensor using input geometry.
|
// 4. Return the as_strided view of the storage tensor using input geometry.
|
||||||
//
|
//
|
||||||
// See NOTE [ Detecting Memory Overlap Within A Strided Tensor ] on how to
|
// See NOTE [ Detecting Memory Overlap Within A Strided Tensor ] on how to
|
||||||
// roughly detech overlapping memory.
|
// roughly detect overlapping memory.
|
||||||
|
|
||||||
// NOTE [ Detecting Memory Overlap Within A Strided Tensor ]
|
// NOTE [ Detecting Memory Overlap Within A Strided Tensor ]
|
||||||
//
|
//
|
||||||
@ -2994,7 +2994,7 @@ Tensor softplus_double_backward(
|
|||||||
// Now that we established the above claim (***), we consider the
|
// Now that we established the above claim (***), we consider the
|
||||||
// view operation as first sorting the dimensions (i.e., blocks),
|
// view operation as first sorting the dimensions (i.e., blocks),
|
||||||
// apply the original view (since it only cares dimensions being
|
// apply the original view (since it only cares dimensions being
|
||||||
// consecutive and contiguous withtin each block), and then undo
|
// consecutive and contiguous within each block), and then undo
|
||||||
// the sort.
|
// the sort.
|
||||||
//
|
//
|
||||||
// Consider a single block B in the output,
|
// Consider a single block B in the output,
|
||||||
@ -3046,7 +3046,7 @@ Tensor softplus_double_backward(
|
|||||||
// size'[i] <= floor(size[i] / k)
|
// size'[i] <= floor(size[i] / k)
|
||||||
//
|
//
|
||||||
// If size'[i] = 1, invariant is obviously satisfied as we are
|
// If size'[i] = 1, invariant is obviously satisfied as we are
|
||||||
// just removing a dimension (afte step (1)).
|
// just removing a dimension (after step (1)).
|
||||||
//
|
//
|
||||||
// Assume size'[i] > 1.
|
// Assume size'[i] > 1.
|
||||||
//
|
//
|
||||||
@ -5244,7 +5244,7 @@ bool any_variable_defined(const variable_list& variables) {
|
|||||||
// Derivations for the householder_product.backward method.
|
// Derivations for the householder_product.backward method.
|
||||||
//
|
//
|
||||||
// Given a sequence of vectors v_1, ..., v_n and a sequence of scalars tau_1,
|
// Given a sequence of vectors v_1, ..., v_n and a sequence of scalars tau_1,
|
||||||
// ..., tau_k, the torch.linalg.householder_product computes the firt n columns
|
// ..., tau_k, the torch.linalg.householder_product computes the first n columns
|
||||||
// of the following product: Q = (I - tau_1 v_1 v_1^H) ... (I - tau_k v_k
|
// of the following product: Q = (I - tau_1 v_1 v_1^H) ... (I - tau_k v_k
|
||||||
// v_k^H). Let
|
// v_k^H). Let
|
||||||
// H_i(sigma) := I - sigma v_i v_i^H, so Q = (H_1(sigma_1) ...
|
// H_i(sigma) := I - sigma v_i v_i^H, so Q = (H_1(sigma_1) ...
|
||||||
@ -5648,7 +5648,7 @@ std::tuple<Tensor, Tensor, Tensor> ormqr_backward(
|
|||||||
// left = false and transpose = true is very much similar with just
|
// left = false and transpose = true is very much similar with just
|
||||||
// transposed arguments passed into householder_product_backward.
|
// transposed arguments passed into householder_product_backward.
|
||||||
// Ormqr computes B = H_1 * ... * H_k * A.
|
// Ormqr computes B = H_1 * ... * H_k * A.
|
||||||
// The sensivity wrt H_i is given by (see notes in
|
// The sensitivity wrt H_i is given by (see notes in
|
||||||
// householder_product_backward) Tr(H_i_plus B B_grad^H H_i_minus dH_i),
|
// householder_product_backward) Tr(H_i_plus B B_grad^H H_i_minus dH_i),
|
||||||
// so, since householder_product_backward respects `for i in range(k)`, we
|
// so, since householder_product_backward respects `for i in range(k)`, we
|
||||||
// could reuse householder_product_backward with
|
// could reuse householder_product_backward with
|
||||||
|
|||||||
@ -278,7 +278,7 @@ static void general_trace_function(
|
|||||||
tracer::addOutput(node, iter->toTensorList());
|
tracer::addOutput(node, iter->toTensorList());
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"unsupported ouptut list type: " + elem_type->str());
|
"unsupported output list type: " + elem_type->str());
|
||||||
}
|
}
|
||||||
} else if (type->kind() == TypeKind::ClassType) {
|
} else if (type->kind() == TypeKind::ClassType) {
|
||||||
AT_ASSERT(iter->isObject());
|
AT_ASSERT(iter->isObject());
|
||||||
|
|||||||
@ -30,7 +30,7 @@ struct TORCH_API AnomalyMode {
|
|||||||
///
|
///
|
||||||
/// Anomaly detection mode is useful for debugging problems happening
|
/// Anomaly detection mode is useful for debugging problems happening
|
||||||
/// in the backward, such as unexpectedly modified tensors or NaNs
|
/// in the backward, such as unexpectedly modified tensors or NaNs
|
||||||
/// occuring in the backward.
|
/// occurring in the backward.
|
||||||
///
|
///
|
||||||
/// The enabling of anomaly mode is global - as soon as there is one
|
/// The enabling of anomaly mode is global - as soon as there is one
|
||||||
/// such guard, it is enabled for all computation and threads. It also
|
/// such guard, it is enabled for all computation and threads. It also
|
||||||
|
|||||||
@ -53,7 +53,7 @@ using at::Tensor;
|
|||||||
//
|
//
|
||||||
// This layout constraint is ensured in the `set_fw_grad` function below
|
// This layout constraint is ensured in the `set_fw_grad` function below
|
||||||
|
|
||||||
// More complex cases arrise when non-dual Tensor interact with dual Tensors.
|
// More complex cases arise when non-dual Tensor interact with dual Tensors.
|
||||||
// The two most important cases are:
|
// The two most important cases are:
|
||||||
//
|
//
|
||||||
// # Have:
|
// # Have:
|
||||||
@ -222,7 +222,7 @@ void AutogradMeta::set_fw_grad(
|
|||||||
if (utils::has_same_meta(new_grad, base) &&
|
if (utils::has_same_meta(new_grad, base) &&
|
||||||
utils::has_same_meta(new_grad, self)) {
|
utils::has_same_meta(new_grad, self)) {
|
||||||
// TODO extend this special case to when the underlying storage of
|
// TODO extend this special case to when the underlying storage of
|
||||||
// new_grad can be re-used.
|
// new_grad can be reused.
|
||||||
new_base_fw_grad = new_grad;
|
new_base_fw_grad = new_grad;
|
||||||
} else {
|
} else {
|
||||||
new_base_fw_grad =
|
new_base_fw_grad =
|
||||||
|
|||||||
@ -611,7 +611,7 @@ auto Engine::thread_main(const std::shared_ptr<GraphTask>& graph_task) -> void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reentrant call will re-use the graph_task's owner thread ready_queue for
|
// Reentrant call will reuse the graph_task's owner thread ready_queue for
|
||||||
// queueing tasks (NOTE: this is not true in the async_mode of the engine).
|
// queueing tasks (NOTE: this is not true in the async_mode of the engine).
|
||||||
// While we can create separate ready queue for each new reentrant
|
// While we can create separate ready queue for each new reentrant
|
||||||
// thread, but sharing the same cpu_ready_queue with parent thread is a
|
// thread, but sharing the same cpu_ready_queue with parent thread is a
|
||||||
@ -1228,7 +1228,7 @@ void Engine::evaluate_function(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t compute_min_topological_nr(const edge_list& outputs) {
|
static uint64_t compute_min_topological_nr(const edge_list& outputs) {
|
||||||
// Computes the mininum topological number among all the outputs
|
// Computes the minimum topological number among all the outputs
|
||||||
if (outputs.empty()) {
|
if (outputs.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,7 +27,7 @@ struct ForwardGrad;
|
|||||||
// - Ensure that we can keep the level that we expose to the user API simple
|
// - Ensure that we can keep the level that we expose to the user API simple
|
||||||
// (an integer
|
// (an integer
|
||||||
// that represents the nesting depth) while avoiding confusions when the
|
// that represents the nesting depth) while avoiding confusions when the
|
||||||
// level index is re-used.
|
// level index is reused.
|
||||||
|
|
||||||
// The important external APIs from this file are:
|
// The important external APIs from this file are:
|
||||||
// - ForwardADLevel::get_next_idx() that can be used to enter a new level and
|
// - ForwardADLevel::get_next_idx() that can be used to enter a new level and
|
||||||
|
|||||||
@ -67,7 +67,7 @@ TORCH_API std::shared_ptr<Node> get_current_node();
|
|||||||
// or more input `Variable`s and producing zero or more output `Variable`s. All
|
// or more input `Variable`s and producing zero or more output `Variable`s. All
|
||||||
// functions in PyTorch's autograd machinery derive from this class and
|
// functions in PyTorch's autograd machinery derive from this class and
|
||||||
// override its `apply` method. Instances of such subclasses will then be
|
// override its `apply` method. Instances of such subclasses will then be
|
||||||
// invokable via the call operator.
|
// invocable via the call operator.
|
||||||
//
|
//
|
||||||
// Nodes in the Autograd Graph
|
// Nodes in the Autograd Graph
|
||||||
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
@ -592,7 +592,7 @@ struct TORCH_API Node : std::enable_shared_from_this<Node> {
|
|||||||
// 1) Extract tensors/symint args
|
// 1) Extract tensors/symint args
|
||||||
// 2) Collect node information for specialization and caching
|
// 2) Collect node information for specialization and caching
|
||||||
// Implementations in subclasses should call args.collect() with all node
|
// Implementations in subclasses should call args.collect() with all node
|
||||||
// attrs. These functions are only called durring backward.
|
// attrs. These functions are only called during backward.
|
||||||
virtual void compiled_args(CompiledNodeArgs& args) const {
|
virtual void compiled_args(CompiledNodeArgs& args) const {
|
||||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||||
false, std::string("compiled_args not implemented: ") + name());
|
false, std::string("compiled_args not implemented: ") + name());
|
||||||
|
|||||||
@ -21,7 +21,7 @@ variable_list Error::apply(variable_list&& inputs) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Error::compiled_args(CompiledNodeArgs& args) const {
|
void Error::compiled_args(CompiledNodeArgs& args) const {
|
||||||
// throw the error durring collect, the graph won't get compiled
|
// throw the error during collect, the graph won't get compiled
|
||||||
apply(variable_list());
|
apply(variable_list());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -605,7 +605,7 @@ static PyObject* set_autocast_enabled(
|
|||||||
HANDLE_TH_ERRORS
|
HANDLE_TH_ERRORS
|
||||||
static PythonArgParser parser(
|
static PythonArgParser parser(
|
||||||
{"set_autocast_enabled(std::string_view device_type, bool enabled)",
|
{"set_autocast_enabled(std::string_view device_type, bool enabled)",
|
||||||
"set_autocast_enabled(bool enabled)"}); // this signature is depracated.
|
"set_autocast_enabled(bool enabled)"}); // this signature is deprecated.
|
||||||
ParsedArgs<2> parsed_args;
|
ParsedArgs<2> parsed_args;
|
||||||
auto r = parser.parse(args, kwargs, parsed_args);
|
auto r = parser.parse(args, kwargs, parsed_args);
|
||||||
// Set at::kCUDA as default value to prevent BC-breaking changes.
|
// Set at::kCUDA as default value to prevent BC-breaking changes.
|
||||||
@ -628,7 +628,7 @@ static PyObject* is_autocast_enabled(
|
|||||||
HANDLE_TH_ERRORS
|
HANDLE_TH_ERRORS
|
||||||
static PythonArgParser parser(
|
static PythonArgParser parser(
|
||||||
{"is_autocast_enabled(std::string_view device_type)",
|
{"is_autocast_enabled(std::string_view device_type)",
|
||||||
"is_autocast_enabled()"}); // this signature is depracated.
|
"is_autocast_enabled()"}); // this signature is deprecated.
|
||||||
ParsedArgs<1> parsed_args;
|
ParsedArgs<1> parsed_args;
|
||||||
auto r = parser.parse(args, kwargs, parsed_args);
|
auto r = parser.parse(args, kwargs, parsed_args);
|
||||||
// Set at::kCUDA as default value to prevent BC-breaking changes.
|
// Set at::kCUDA as default value to prevent BC-breaking changes.
|
||||||
|
|||||||
@ -622,7 +622,7 @@ void prepareProfiler(
|
|||||||
/*
|
/*
|
||||||
* Sending a warning and passing the non-standard event to the backend
|
* Sending a warning and passing the non-standard event to the backend
|
||||||
* Backend can abort if the event is not supported.
|
* Backend can abort if the event is not supported.
|
||||||
* TODO Should we gracefully drop the invalid event if we have atleast one
|
* TODO Should we gracefully drop the invalid event if we have at least one
|
||||||
* valid?
|
* valid?
|
||||||
*/
|
*/
|
||||||
auto is_standard_event = [](const std::string& event) -> bool {
|
auto is_standard_event = [](const std::string& event) -> bool {
|
||||||
|
|||||||
@ -186,7 +186,7 @@ struct TORCH_CUDA_CPP_API CUDAPluggableAllocator
|
|||||||
std::function<void(int, c10::cuda::MempoolId_t)> end_allocate_to_pool_fn_;
|
std::function<void(int, c10::cuda::MempoolId_t)> end_allocate_to_pool_fn_;
|
||||||
std::function<void(int, c10::cuda::MempoolId_t)> relase_pool_fn_;
|
std::function<void(int, c10::cuda::MempoolId_t)> relase_pool_fn_;
|
||||||
std::mutex allocator_mutex_;
|
std::mutex allocator_mutex_;
|
||||||
// We do the bookeeping here in order to simplify custom allocators
|
// We do the bookkeeping here in order to simplify custom allocators
|
||||||
std::unordered_map<void*, _AllocationMetadata> allocation_metadata_;
|
std::unordered_map<void*, _AllocationMetadata> allocation_metadata_;
|
||||||
|
|
||||||
bool initialized_ = false;
|
bool initialized_ = false;
|
||||||
|
|||||||
@ -21,7 +21,7 @@ std::string cuGDSFileGetErrorString(T status) {
|
|||||||
: std::string(c10::utils::str_error(errno));
|
: std::string(c10::utils::str_error(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
// To get error message for Buf/Handle registeration APIs that return
|
// To get error message for Buf/Handle registration APIs that return
|
||||||
// CUfileError_t
|
// CUfileError_t
|
||||||
template <
|
template <
|
||||||
class T,
|
class T,
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
# torch::deploy has been moved to pytorch/multipy
|
# torch::deploy has been moved to pytorch/multipy <!-- codespell:ignore -->
|
||||||
Please check out [https://github.com/pytorch/multipy](https://github.com/pytorch/multipy) to find the new home for torch::deploy.
|
Please check out [https://github.com/pytorch/multipy](https://github.com/pytorch/multipy) to find the new home for torch::deploy. <!-- codespell:ignore -->
|
||||||
|
|||||||
@ -98,7 +98,7 @@ struct TORCH_API PyCompilerGuard {
|
|||||||
// including torch/csrc/autograd/engine.h breaks BC by somehow introducing
|
// including torch/csrc/autograd/engine.h breaks BC by somehow introducing
|
||||||
// symbol resolution issues. Instead requiring downstream users to include
|
// symbol resolution issues. Instead requiring downstream users to include
|
||||||
// engine.h to access collect_input_metadata, we provide it here (with a
|
// engine.h to access collect_input_metadata, we provide it here (with a
|
||||||
// different name to avoid ambigous symbols...)
|
// different name to avoid ambiguous symbols...)
|
||||||
TORCH_API std::vector<std::optional<InputMetadata>> get_input_metadata(
|
TORCH_API std::vector<std::optional<InputMetadata>> get_input_metadata(
|
||||||
const edge_list& edges);
|
const edge_list& edges);
|
||||||
|
|
||||||
@ -1068,7 +1068,7 @@ class SwapSavedVariables {
|
|||||||
// (e.g. MulBackward0_apply_functional). Compiled Autograd's initial graph
|
// (e.g. MulBackward0_apply_functional). Compiled Autograd's initial graph
|
||||||
// capture wants to take a variant of this function and proxy it into the graph.
|
// capture wants to take a variant of this function and proxy it into the graph.
|
||||||
// Every autograd node defines an apply_with_saved function, that when invoked,
|
// Every autograd node defines an apply_with_saved function, that when invoked,
|
||||||
// proxys a call to a function into the Compiled Autograd graph.
|
// proxies a call to a function into the Compiled Autograd graph.
|
||||||
//
|
//
|
||||||
// Some requirements that we have are:
|
// Some requirements that we have are:
|
||||||
// - The proxy'ed function must have inputs that are FX-graphable types.
|
// - The proxy'ed function must have inputs that are FX-graphable types.
|
||||||
|
|||||||
@ -274,7 +274,7 @@ PyObject* dynamo__custom_eval_frame(
|
|||||||
// NB: We could use extract_cache_entry to get the cache_entry, but
|
// NB: We could use extract_cache_entry to get the cache_entry, but
|
||||||
// extract_cache_entry returns a borrowed reference. Modifying a borrowed
|
// extract_cache_entry returns a borrowed reference. Modifying a borrowed
|
||||||
// reference seems wrong. Therefore, we directly access the
|
// reference seems wrong. Therefore, we directly access the
|
||||||
// extra->cache_entry. extra wont be NULL here.
|
// extra->cache_entry. extra won't be NULL here.
|
||||||
CacheEntry* new_cache_entry =
|
CacheEntry* new_cache_entry =
|
||||||
create_cache_entry(extra, guarded_code, backend);
|
create_cache_entry(extra, guarded_code, backend);
|
||||||
|
|
||||||
|
|||||||
@ -132,7 +132,7 @@ void destroy_extra_state(void* obj);
|
|||||||
// Clears the existing object sitting on the extra scratch spance and sets it
|
// Clears the existing object sitting on the extra scratch spance and sets it
|
||||||
// up with the new state. Note that _PyCode_SetExtra calls the
|
// up with the new state. Note that _PyCode_SetExtra calls the
|
||||||
// destroy_extra_state deleter internally, and therefore we don't call it
|
// destroy_extra_state deleter internally, and therefore we don't call it
|
||||||
// explicity here.
|
// explicitly here.
|
||||||
|
|
||||||
// Ownership contract
|
// Ownership contract
|
||||||
// args
|
// args
|
||||||
@ -148,7 +148,7 @@ void destroy_extra_state(void* obj);
|
|||||||
// scratch space.
|
// scratch space.
|
||||||
void set_extra_state(PyCodeObject* code, ExtraState* extra_state);
|
void set_extra_state(PyCodeObject* code, ExtraState* extra_state);
|
||||||
|
|
||||||
// Creates a new extra state and put it on the extra scrach space of the code
|
// Creates a new extra state and put it on the extra scratch space of the code
|
||||||
// object.
|
// object.
|
||||||
|
|
||||||
// Ownership contract
|
// Ownership contract
|
||||||
|
|||||||
@ -60,7 +60,7 @@ typedef struct {
|
|||||||
PyTupleObject* it_seq; /* Set to NULL when iterator is exhausted */
|
PyTupleObject* it_seq; /* Set to NULL when iterator is exhausted */
|
||||||
} _PyTupleIterObject;
|
} _PyTupleIterObject;
|
||||||
|
|
||||||
// Copied from CPython, and given a unified name for different Python verions.
|
// Copied from CPython, and given a unified name for different Python versions.
|
||||||
// https://github.com/python/cpython/blob/7f71003b222ad398713514c2b55d34dc05dba6bc/Objects/rangeobject.c#L765-L771
|
// https://github.com/python/cpython/blob/7f71003b222ad398713514c2b55d34dc05dba6bc/Objects/rangeobject.c#L765-L771
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
@ -124,7 +124,7 @@ TensorCheck::TensorCheck(
|
|||||||
// See note in guards.py [Note - On Export Tensor Guards]
|
// See note in guards.py [Note - On Export Tensor Guards]
|
||||||
// Logic parallel to here must be maintained in python
|
// Logic parallel to here must be maintained in python
|
||||||
bool TensorCheck::check(const LocalState& state, const at::Tensor& v) {
|
bool TensorCheck::check(const LocalState& state, const at::Tensor& v) {
|
||||||
// In terms of a sparse_csr tensor, it does not support strides informatio
|
// In terms of a sparse_csr tensor, it does not support strides information
|
||||||
c10::SymIntArrayRef sym_strides(std::vector<SymInt>(v.ndimension(), -1));
|
c10::SymIntArrayRef sym_strides(std::vector<SymInt>(v.ndimension(), -1));
|
||||||
bool does_not_support_stride = v.layout() == c10::kSparseCsr ||
|
bool does_not_support_stride = v.layout() == c10::kSparseCsr ||
|
||||||
v.layout() == c10::kSparseCsc || v.layout() == c10::kSparseBsc ||
|
v.layout() == c10::kSparseCsc || v.layout() == c10::kSparseBsc ||
|
||||||
@ -2407,7 +2407,7 @@ class GuardAccessor {
|
|||||||
* value passed to the check function to call the check function of the child
|
* value passed to the check function to call the check function of the child
|
||||||
* guard manager.
|
* guard manager.
|
||||||
*
|
*
|
||||||
* Performace optimization for fail fast - An optimization for runtime here is
|
* Performance optimization for fail fast - An optimization for runtime here is
|
||||||
* to sort the execution of child guards depending on the failure count. This
|
* to sort the execution of child guards depending on the failure count. This
|
||||||
* ensures that we run the guards that are more prone to fail statistically
|
* ensures that we run the guards that are more prone to fail statistically
|
||||||
* first. This can improve the cache lookup time when we have multiple cache
|
* first. This can improve the cache lookup time when we have multiple cache
|
||||||
@ -2831,7 +2831,7 @@ class RootGuardManager : public GuardManager {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
bool check_nopybind_template(T* value) { // borrowed ref
|
bool check_nopybind_template(T* value) { // borrowed ref
|
||||||
// Check [Note on GIL interaction with mutex lock] for details on why we
|
// Check [Note on GIL interaction with mutex lock] for details on why we
|
||||||
// need mutex and its interactions wth GIL.
|
// need mutex and its interactions with GIL.
|
||||||
PyThreadState* _save = nullptr;
|
PyThreadState* _save = nullptr;
|
||||||
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
||||||
std::lock_guard<std::mutex> lock_guard(_lock);
|
std::lock_guard<std::mutex> lock_guard(_lock);
|
||||||
@ -2889,7 +2889,7 @@ class RootGuardManager : public GuardManager {
|
|||||||
GuardDebugInfo check_verbose_nopybind(
|
GuardDebugInfo check_verbose_nopybind(
|
||||||
PyObject* value) override { // borrowed ref
|
PyObject* value) override { // borrowed ref
|
||||||
// Check [Note on GIL interaction with mutex lock] for details on why we
|
// Check [Note on GIL interaction with mutex lock] for details on why we
|
||||||
// need mutex and its interactions wth GIL.
|
// need mutex and its interactions with GIL.
|
||||||
PyThreadState* _save = nullptr;
|
PyThreadState* _save = nullptr;
|
||||||
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
||||||
std::lock_guard<std::mutex> lock_guard(_lock);
|
std::lock_guard<std::mutex> lock_guard(_lock);
|
||||||
@ -2992,7 +2992,7 @@ class RootGuardManager : public GuardManager {
|
|||||||
LocalState _local_state;
|
LocalState _local_state;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// All the relational guards under this guard mananger. We only use these
|
// All the relational guards under this guard manager. We only use these
|
||||||
// when the guard evaluates to False. This ensures that guard state is reset
|
// when the guard evaluates to False. This ensures that guard state is reset
|
||||||
// on guard failure so that next invocation is clean.
|
// on guard failure so that next invocation is clean.
|
||||||
std::vector<std::shared_ptr<RelationalGuard>> _relational_guard_resetters;
|
std::vector<std::shared_ptr<RelationalGuard>> _relational_guard_resetters;
|
||||||
@ -3575,7 +3575,7 @@ class TENSOR_MATCH : public LeafGuard {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents __getattr__ acccessor.
|
* Represents __getattr__ accessor.
|
||||||
*/
|
*/
|
||||||
class GetAttrGuardAccessor : public GuardAccessor {
|
class GetAttrGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -3623,7 +3623,7 @@ class GetAttrGuardAccessor : public GuardAccessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string repr() const override {
|
std::string repr() const override {
|
||||||
// Helpful when priting GuardManager tree structure.
|
// Helpful when printing GuardManager tree structure.
|
||||||
return "GetAttrGuardAccessor(" + py::str(_attr_name).cast<std::string>() +
|
return "GetAttrGuardAccessor(" + py::str(_attr_name).cast<std::string>() +
|
||||||
")";
|
")";
|
||||||
}
|
}
|
||||||
@ -3651,7 +3651,7 @@ class GetAttrGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents object.__getattribute__(obj, attr_name) acccessor.
|
* Represents object.__getattribute__(obj, attr_name) accessor.
|
||||||
*/
|
*/
|
||||||
class GenericGetAttrGuardAccessor : public GuardAccessor {
|
class GenericGetAttrGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -3699,7 +3699,7 @@ class GenericGetAttrGuardAccessor : public GuardAccessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string repr() const override {
|
std::string repr() const override {
|
||||||
// Helpful when priting GuardManager tree structure.
|
// Helpful when printing GuardManager tree structure.
|
||||||
return "GenericGetAttrGuardAccessor(" +
|
return "GenericGetAttrGuardAccessor(" +
|
||||||
py::str(_attr_name).cast<std::string>() + ")";
|
py::str(_attr_name).cast<std::string>() + ")";
|
||||||
}
|
}
|
||||||
@ -3730,7 +3730,7 @@ class GenericGetAttrGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents x.__dict__ acccessor.
|
* Represents x.__dict__ accessor.
|
||||||
*/
|
*/
|
||||||
class GetGenericDictGuardAccessor : public GuardAccessor {
|
class GetGenericDictGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -3777,7 +3777,7 @@ class GetGenericDictGuardAccessor : public GuardAccessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string repr() const override {
|
std::string repr() const override {
|
||||||
// Helpful when priting GuardManager tree structure.
|
// Helpful when printing GuardManager tree structure.
|
||||||
return "GetGenericDictGuardAccessor";
|
return "GetGenericDictGuardAccessor";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3798,7 +3798,7 @@ class GetGenericDictGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents __getitem__ acccessor.
|
* Represents __getitem__ accessor.
|
||||||
*/
|
*/
|
||||||
class GetItemGuardAccessor : public GuardAccessor {
|
class GetItemGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -3995,7 +3995,7 @@ class FrameLocalsGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents dict[name] acccessor. Needed since DictGuardManager does not
|
* Represents dict[name] accessor. Needed since DictGuardManager does not
|
||||||
* support sorting. We differentiate it from GetItemGuardAccessor because
|
* support sorting. We differentiate it from GetItemGuardAccessor because
|
||||||
* PyDict_GetItem should be faster than PyObject_GetItem.
|
* PyDict_GetItem should be faster than PyObject_GetItem.
|
||||||
*/
|
*/
|
||||||
@ -4023,7 +4023,7 @@ class DictGetItemGuardAccessor : public GuardAccessor {
|
|||||||
_guard_manager->has_no_accessors()) {
|
_guard_manager->has_no_accessors()) {
|
||||||
// immutable object and dict tag matches, we can skip the guard subtree.
|
// immutable object and dict tag matches, we can skip the guard subtree.
|
||||||
// NB: We only skip the subtree if there are no accessors in the subtree.
|
// NB: We only skip the subtree if there are no accessors in the subtree.
|
||||||
// This is specificallly for tensors which are used in symbolic shape C++
|
// This is specifically for tensors which are used in symbolic shape C++
|
||||||
// guards, and therefore have accessors on the tensor GuardManager itself.
|
// guards, and therefore have accessors on the tensor GuardManager itself.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -4244,7 +4244,7 @@ std::string to_string(TensorProperty prop) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents tensor.size/shape/storage_offset acccessor.
|
* Represents tensor.size/shape/storage_offset accessor.
|
||||||
*/
|
*/
|
||||||
template <TensorProperty _prop>
|
template <TensorProperty _prop>
|
||||||
class TensorPropertyGuardAccessor : public GuardAccessor {
|
class TensorPropertyGuardAccessor : public GuardAccessor {
|
||||||
@ -4342,7 +4342,7 @@ class TensorPropertyGuardAccessor : public GuardAccessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string repr() const override {
|
std::string repr() const override {
|
||||||
// Helpful when priting GuardManager tree structure.
|
// Helpful when printing GuardManager tree structure.
|
||||||
return "TensorPropertyGuardAccessor<" + to_string(_prop) + +">(" +
|
return "TensorPropertyGuardAccessor<" + to_string(_prop) + +">(" +
|
||||||
std::to_string(_index) + ")";
|
std::to_string(_index) + ")";
|
||||||
}
|
}
|
||||||
@ -4434,7 +4434,7 @@ class IndexedGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents tensor.grad acccessor.
|
* Represents tensor.grad accessor.
|
||||||
*/
|
*/
|
||||||
class GradGuardAccessor : public GuardAccessor {
|
class GradGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -4485,7 +4485,7 @@ class GradGuardAccessor : public GuardAccessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string repr() const override {
|
std::string repr() const override {
|
||||||
// Helpful when priting GuardManager tree structure.
|
// Helpful when printing GuardManager tree structure.
|
||||||
return "GradGuardAccessor(grad)";
|
return "GradGuardAccessor(grad)";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4654,7 +4654,7 @@ class FuncKwDefaultsGuardAccessor : public GuardAccessor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents f_globals acccessor. This sits as a child accessor of the
|
* Represents f_globals accessor. This sits as a child accessor of the
|
||||||
* RootGuardManager.
|
* RootGuardManager.
|
||||||
*/
|
*/
|
||||||
class GlobalsGuardAccessor : public GuardAccessor {
|
class GlobalsGuardAccessor : public GuardAccessor {
|
||||||
@ -4847,7 +4847,7 @@ class TupleIteratorGetItemAccessor : public GuardAccessor {
|
|||||||
* GlobalWeakRef accessor. Dynamo can insert a weakref object into the frame
|
* GlobalWeakRef accessor. Dynamo can insert a weakref object into the frame
|
||||||
* globals. This accessor reads the globals and then calls the weakref object
|
* globals. This accessor reads the globals and then calls the weakref object
|
||||||
* to get the underlying object. This is a child of GlobalsGuardAccessor.
|
* to get the underlying object. This is a child of GlobalsGuardAccessor.
|
||||||
* Therefore, we will get the globals dict while caling check_nopybind.
|
* Therefore, we will get the globals dict while calling check_nopybind.
|
||||||
*/
|
*/
|
||||||
class GlobalWeakRefGuardAccessor : public GuardAccessor {
|
class GlobalWeakRefGuardAccessor : public GuardAccessor {
|
||||||
public:
|
public:
|
||||||
@ -5207,7 +5207,7 @@ void install_object_aliasing_guard(
|
|||||||
std::shared_ptr<RelationalGuard> guard =
|
std::shared_ptr<RelationalGuard> guard =
|
||||||
std::make_shared<OBJECT_ALIASING>(std::move(verbose_code_parts));
|
std::make_shared<OBJECT_ALIASING>(std::move(verbose_code_parts));
|
||||||
|
|
||||||
// Register the resetter on the root guard mananger, so that it can reset
|
// Register the resetter on the root guard manager, so that it can reset
|
||||||
// the newly added relational guard when the guard eval fails.
|
// the newly added relational guard when the guard eval fails.
|
||||||
x->get_root()->add_relational_guard_resetter(guard);
|
x->get_root()->add_relational_guard_resetter(guard);
|
||||||
|
|
||||||
@ -5227,7 +5227,7 @@ void install_no_tensor_aliasing_guard(
|
|||||||
std::shared_ptr<RelationalGuard> guard = std::make_shared<NO_TENSOR_ALIASING>(
|
std::shared_ptr<RelationalGuard> guard = std::make_shared<NO_TENSOR_ALIASING>(
|
||||||
tensor_names, std::move(verbose_code_parts));
|
tensor_names, std::move(verbose_code_parts));
|
||||||
|
|
||||||
// Register the resetter on the root guard mananger, so that it can reset
|
// Register the resetter on the root guard manager, so that it can reset
|
||||||
// the newly added relational guard when the guard eval fails.
|
// the newly added relational guard when the guard eval fails.
|
||||||
py::cast<GuardManager*>(guard_managers[0])
|
py::cast<GuardManager*>(guard_managers[0])
|
||||||
->get_root()
|
->get_root()
|
||||||
@ -5255,7 +5255,7 @@ void install_symbolic_shape_guard(
|
|||||||
std::move(py_addr_keep_alive),
|
std::move(py_addr_keep_alive),
|
||||||
std::move(verbose_code_parts));
|
std::move(verbose_code_parts));
|
||||||
|
|
||||||
// Register the resetter on the root guard mananger, so that it can reset
|
// Register the resetter on the root guard manager, so that it can reset
|
||||||
// the newly added relational guard when the guard eval fails.
|
// the newly added relational guard when the guard eval fails.
|
||||||
py::cast<GuardManager*>(guard_managers[0])
|
py::cast<GuardManager*>(guard_managers[0])
|
||||||
->get_root()
|
->get_root()
|
||||||
@ -6309,7 +6309,7 @@ PyObject* torch_c_dynamo_guards_init() {
|
|||||||
self.add_permitted_leaf_guard(std::make_shared<NO_HASATTR>(
|
self.add_permitted_leaf_guard(std::make_shared<NO_HASATTR>(
|
||||||
std::move(attr_name), std::move(verbose_code_parts)));
|
std::move(attr_name), std::move(verbose_code_parts)));
|
||||||
})
|
})
|
||||||
// Not permitted accesssors
|
// Not permitted accessors
|
||||||
.def("lambda_manager", &DictGuardManager::fail_on_get_child_manager)
|
.def("lambda_manager", &DictGuardManager::fail_on_get_child_manager)
|
||||||
.def("getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
.def("getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
||||||
.def("dict_getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
.def("dict_getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
||||||
|
|||||||
@ -110,7 +110,7 @@ std::vector<ParameterMetadata> unpack_input_parameters(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (stack[idx].isScalar()) {
|
if (stack[idx].isScalar()) {
|
||||||
// Beyond c10::Scalar, the floating value and interger value are also
|
// Beyond c10::Scalar, the floating value and integer value are also
|
||||||
// represented as Scalar.
|
// represented as Scalar.
|
||||||
inputs_metadata.emplace_back(stack[idx].toScalar(), arg_order);
|
inputs_metadata.emplace_back(stack[idx].toScalar(), arg_order);
|
||||||
} else if (stack[idx].isTensorList()) {
|
} else if (stack[idx].isTensorList()) {
|
||||||
@ -528,7 +528,7 @@ std::string AOTIPythonKernelHolder::produce_aoti_kernel_lib(
|
|||||||
auto kernel_lib_path = py::cast<std::string>(result);
|
auto kernel_lib_path = py::cast<std::string>(result);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
!kernel_lib_path.empty(),
|
!kernel_lib_path.empty(),
|
||||||
"Failed to produce kernel libarary by using AOTI for ",
|
"Failed to produce kernel library by using AOTI for ",
|
||||||
c10::DeviceTypeName(device_.type()),
|
c10::DeviceTypeName(device_.type()),
|
||||||
". Operator Name is ",
|
". Operator Name is ",
|
||||||
op.operator_name().name,
|
op.operator_name().name,
|
||||||
|
|||||||
@ -121,8 +121,8 @@ TORCH_API std::unordered_map<std::string, CreateAOTIModelRunnerFunc>&
|
|||||||
getAOTIModelRunnerRegistry();
|
getAOTIModelRunnerRegistry();
|
||||||
|
|
||||||
// To register a new external backend in AOTI one needs to create an instance of
|
// To register a new external backend in AOTI one needs to create an instance of
|
||||||
// this struct. It is not thread-safe. Becase it is expected to be called during
|
// this struct. It is not thread-safe. Because it is expected to be called
|
||||||
// the initialization of the program.
|
// during the initialization of the program.
|
||||||
struct TORCH_API RegisterAOTIModelRunner{RegisterAOTIModelRunner(
|
struct TORCH_API RegisterAOTIModelRunner{RegisterAOTIModelRunner(
|
||||||
const std::string& name,
|
const std::string& name,
|
||||||
CreateAOTIModelRunnerFunc create_aoti_model_runner_fn){
|
CreateAOTIModelRunnerFunc create_aoti_model_runner_fn){
|
||||||
|
|||||||
@ -659,7 +659,7 @@ class AOTInductorModelBase {
|
|||||||
AOTI_RUNTIME_CHECK(
|
AOTI_RUNTIME_CHECK(
|
||||||
reinterpret_cast<uint64_t*>(
|
reinterpret_cast<uint64_t*>(
|
||||||
self_mmap + weights_size - sizeof(uint64_t))[0] == magic_number,
|
self_mmap + weights_size - sizeof(uint64_t))[0] == magic_number,
|
||||||
"Weigths data seems corrupt");
|
"Weights data seems corrupt");
|
||||||
return self_mmap;
|
return self_mmap;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -707,7 +707,7 @@ class AOTInductorModelBase {
|
|||||||
bool include_weights;
|
bool include_weights;
|
||||||
|
|
||||||
// Record if the model finishes an inference run so that its owning
|
// Record if the model finishes an inference run so that its owning
|
||||||
// AOTModelContainer can re-use this instance.
|
// AOTModelContainer can reuse this instance.
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
std::optional<cudaEvent_t> run_finished_;
|
std::optional<cudaEvent_t> run_finished_;
|
||||||
#elif defined(USE_XPU)
|
#elif defined(USE_XPU)
|
||||||
|
|||||||
@ -18,7 +18,7 @@ namespace torch::aot_inductor {
|
|||||||
// when model_container is created and no constants are being loaded or updated.
|
// when model_container is created and no constants are being loaded or updated.
|
||||||
// (2) INITIALIZED state: This state get set whenever we load the constants into
|
// (2) INITIALIZED state: This state get set whenever we load the constants into
|
||||||
// the buffer. This could be done by load_constants or update_constants_buffer.
|
// the buffer. This could be done by load_constants or update_constants_buffer.
|
||||||
// (3) FOLDED state: This state should transition from INITIALILZED after
|
// (3) FOLDED state: This state should transition from INITIALIZED after
|
||||||
// const_fold is being invoked.
|
// const_fold is being invoked.
|
||||||
enum class ConstantState : uint8_t { NONE, INITIALIZED, FOLDED, UNKNOWN };
|
enum class ConstantState : uint8_t { NONE, INITIALIZED, FOLDED, UNKNOWN };
|
||||||
|
|
||||||
|
|||||||
@ -872,7 +872,7 @@ void OSSProxyExecutor::call_function(
|
|||||||
auto serialized_int_value = flatten_int_args[int_id++];
|
auto serialized_int_value = flatten_int_args[int_id++];
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
returned_int_value == serialized_int_value,
|
returned_int_value == serialized_int_value,
|
||||||
"Expect returned int value to match the serialized int value, but got retured int value: ",
|
"Expect returned int value to match the serialized int value, but got returned int value: ",
|
||||||
returned_int_value,
|
returned_int_value,
|
||||||
" and serialized int value: ",
|
" and serialized int value: ",
|
||||||
serialized_int_value);
|
serialized_int_value);
|
||||||
|
|||||||
Reference in New Issue
Block a user