mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[7/N] Fix Wextra-semi warning (#140225)"
This reverts commit ffb979032dc149b4c895526fe5b92d713ed7b1e1. Reverted https://github.com/pytorch/pytorch/pull/140225 on behalf of https://github.com/kit1980 due to breaking internal builds ([comment](https://github.com/pytorch/pytorch/pull/140225#issuecomment-2469312229))
This commit is contained in:
@ -101,16 +101,9 @@ SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: c++17
|
||||
StatementMacros:
|
||||
- C10_DEFINE_bool
|
||||
- C10_DEFINE_int
|
||||
- C10_DEFINE_int32
|
||||
- C10_DEFINE_int64
|
||||
- C10_DEFINE_string
|
||||
- PyObject_HEAD
|
||||
- PyObject_VAR_HEAD
|
||||
- PyException_HEAD
|
||||
- DEFINE_BINARY
|
||||
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
---
|
||||
|
@ -1594,8 +1594,8 @@ inline std::tuple<Vectorized<float>, Vectorized<float>> convert_##name##_float(c
|
||||
inline Vectorized<type> convert_float_##name(const Vectorized<float>& a, const Vectorized<float>& b) { \
|
||||
return cvt_from_fp32<type>(__m512(a), __m512(b)); \
|
||||
}
|
||||
CONVERT_VECTORIZED_INIT(BFloat16, bfloat16)
|
||||
CONVERT_VECTORIZED_INIT(Half, half)
|
||||
CONVERT_VECTORIZED_INIT(BFloat16, bfloat16);
|
||||
CONVERT_VECTORIZED_INIT(Half, half);
|
||||
|
||||
#else //defined(CPU_CAPABILITY_AVX512)
|
||||
|
||||
@ -1624,8 +1624,8 @@ inline Vectorized<type> convert_float_##name(const Vectorized<float>& a, const V
|
||||
} \
|
||||
return Vectorized<type>::loadu(arr2); \
|
||||
}
|
||||
CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16)
|
||||
CONVERT_NON_VECTORIZED_INIT(Half, half)
|
||||
CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16);
|
||||
CONVERT_NON_VECTORIZED_INIT(Half, half);
|
||||
|
||||
#endif // defined(CPU_CAPABILITY_AVX512)
|
||||
|
||||
@ -1663,8 +1663,8 @@ inline void load_fp32_from_##name(const type *data, Vectorized<float>& out1, Vec
|
||||
data += Vectorized<float>::size(); \
|
||||
load_fp32_from_##name(data, out2); \
|
||||
}
|
||||
LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16)
|
||||
LOAD_FP32_NON_VECTORIZED_INIT(Half, fp16)
|
||||
LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16);
|
||||
LOAD_FP32_NON_VECTORIZED_INIT(Half, fp16);
|
||||
|
||||
#endif
|
||||
}}}
|
||||
|
@ -61,6 +61,7 @@ bool SymInt::has_hint() const {
|
||||
} \
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
DEFINE_BINARY(operator+, std::plus<>(), add, SymInt)
|
||||
DEFINE_BINARY(operator-, std::minus<>(), sub, SymInt)
|
||||
DEFINE_BINARY(operator*, std::multiplies<>(), mul, SymInt)
|
||||
@ -74,6 +75,7 @@ DEFINE_BINARY(sym_gt, std::greater<>(), gt, SymBool)
|
||||
DEFINE_BINARY(sym_ge, std::greater_equal<>(), ge, SymBool)
|
||||
DEFINE_BINARY(min, std::min, sym_min, SymInt)
|
||||
DEFINE_BINARY(max, std::max, sym_max, SymInt)
|
||||
// clang-format on
|
||||
|
||||
SymInt::operator SymFloat() const {
|
||||
if (auto ma = maybe_as_int()) {
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
namespace at {
|
||||
struct Quantizer;
|
||||
}
|
||||
};
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
@ -54,6 +54,6 @@ namespace VariableType {
|
||||
const at::Tensor & unpack(const Tensor & t, const char * name, int pos);
|
||||
at::Tensor unpack_opt(const Tensor & t, const char * name, int pos);
|
||||
std::vector<at::Tensor> unpack(const at::ITensorListRef& tl, const char *name, int pos);
|
||||
}
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
|
@ -21,7 +21,7 @@ std::unordered_set<rpc::worker_id_t> DistAutogradContext::getKnownWorkerIds()
|
||||
const {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
return knownWorkerIds_;
|
||||
}
|
||||
};
|
||||
|
||||
void DistAutogradContext::addKnownWorkerId(const rpc::worker_id_t workerId) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
|
@ -103,7 +103,7 @@ class TORCH_API Reducer {
|
||||
// been applied.
|
||||
void set_optimizer_in_backward() {
|
||||
optim_in_backward_ = true;
|
||||
}
|
||||
};
|
||||
|
||||
// Runs allreduce or installed communication hook given GradBucket instance.
|
||||
c10::intrusive_ptr<c10::ivalue::Future> run_comm_hook(
|
||||
|
@ -16,7 +16,7 @@
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_do_not_store_optimized_graph,
|
||||
false,
|
||||
"Do not store the optimized graph.")
|
||||
"Do not store the optimized graph.");
|
||||
|
||||
namespace torch::jit {
|
||||
namespace {
|
||||
@ -133,8 +133,8 @@ GraphFunction::SpecializationKey GraphFunction::currentSpecialization() const {
|
||||
void preoptimizeGraph(std::shared_ptr<Graph>& graph, bool disable_autocast) {
|
||||
Inline(*graph);
|
||||
|
||||
// Peephole Optimize cleans up many "is None" checks and creates constant
|
||||
// prop opportunities
|
||||
// Peephole Optimize cleans up many "is None" checks and creates constant prop
|
||||
// opportunities
|
||||
PeepholeOptimize(graph, true);
|
||||
|
||||
// AliasDb construction can be slow, so run it just on immutable types
|
||||
|
@ -6,7 +6,7 @@
|
||||
namespace torch::jit {
|
||||
|
||||
struct ChunkOutput {
|
||||
ChunkOutput(Value* v, size_t o) : val(v), offset(o) {}
|
||||
ChunkOutput(Value* v, size_t o) : val(v), offset(o){};
|
||||
Value* val;
|
||||
size_t offset;
|
||||
};
|
||||
|
@ -29,12 +29,12 @@
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_disable_cat,
|
||||
false,
|
||||
"disable aten::cat in TE fusion groups")
|
||||
"disable aten::cat in TE fusion groups");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_enable_dynamic_shape_fusion,
|
||||
false,
|
||||
"enable TE fusion using dynamic shapes")
|
||||
"enable TE fusion using dynamic shapes");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
@ -82,8 +82,9 @@ static const OperatorSet& supported_non_eltwise_set() {
|
||||
"aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor",
|
||||
"aten::matmul(Tensor self, Tensor other) -> Tensor",
|
||||
};
|
||||
// clang-format on
|
||||
return supported_non_eltwise_set;
|
||||
}
|
||||
};
|
||||
|
||||
bool isSupported(Node* node) {
|
||||
// For Block codegen we allow limited ops.
|
||||
@ -101,6 +102,7 @@ bool isSupported(Node* node) {
|
||||
"aten::cat(Tensor[] tensors, int dim=0) -> Tensor",
|
||||
"aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a)",
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
if (get_tensorexpr_elementwise_set().contains(node) ||
|
||||
node->isMemberOf(supported_non_eltwise_set()) ||
|
||||
@ -901,6 +903,7 @@ class TensorExprFuser {
|
||||
static const OperatorSet pow{
|
||||
"aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor",
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Check types of input values.
|
||||
for (const Value* v : node->inputs()) {
|
||||
|
@ -167,7 +167,7 @@ static std::optional<std::vector<Value*>> build_script_grad(
|
||||
auto grad_inputs = insertGraph(*graph, *bw_graph, grad);
|
||||
grad_inputs = unpackOutputs(grad_inputs);
|
||||
return grad_inputs;
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
class GradientHelper {
|
||||
|
@ -56,9 +56,10 @@
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_execution_plan_reuse_code_graph,
|
||||
false,
|
||||
"Directly reuse the preprocessed graph in the CodeImpl to reduce the memory consumption. This is aggressive memory saving, and please be cautious!")
|
||||
"Directly reuse the preprocessed graph in the CodeImpl to reduce the memory consumption. This is aggressive memory saving, and please be cautious!");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
EnableProfilingGuard::EnableProfilingGuard() {
|
||||
auto& executor_mode = getExecutorMode();
|
||||
old_executor_mode = executor_mode;
|
||||
@ -431,8 +432,8 @@ struct DifferentiableGraphOp {
|
||||
|
||||
{
|
||||
auto inputs = last(stack, num_inputs);
|
||||
// hook up the outputs of df to the gradient functions of the inputs
|
||||
// that require gradients
|
||||
// hook up the outputs of df to the gradient functions of the inputs that
|
||||
// require gradients
|
||||
for (auto idx : grad.df_output_vjps) {
|
||||
grad_fn->addOutputForIValue(inputs[idx]);
|
||||
}
|
||||
@ -454,8 +455,8 @@ struct DifferentiableGraphOp {
|
||||
// TODO - XXX - if any output is the same tensor multiple times, views
|
||||
// have to be setup here. We need to refactor autograd until it is safe
|
||||
// for tensors to be constructed without all the viewing infrastructure.
|
||||
// this is currently intentionally not done here so we can get an idea
|
||||
// of our perf before introducing overhead for correctness
|
||||
// this is currently intentionally not done here so we can get an idea of
|
||||
// our perf before introducing overhead for correctness
|
||||
for (auto idx : grad.df_input_vjps) {
|
||||
grad_fn->addInputIValue(outputs[idx]);
|
||||
}
|
||||
@ -500,8 +501,7 @@ struct DifferentiableGraphOp {
|
||||
detach(stack[i]);
|
||||
}
|
||||
}
|
||||
// Capture (save) inputs that would be required to subsequently run
|
||||
// backwards
|
||||
// Capture (save) inputs that would be required to subsequently run backwards
|
||||
void captureInputs(
|
||||
DifferentiableGraphBackward& grad_fn,
|
||||
at::ArrayRef<IValue> inputs) const {
|
||||
@ -736,10 +736,8 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
|
||||
runOptimization(opt_graph);
|
||||
|
||||
// Phase 4. If this graph will be differentiated, we need to slice out the
|
||||
// symbolically differentiable subgraphs for further
|
||||
// optimizations.
|
||||
// Phase 5. Apply non-differentiable optimizations to the graphs we've
|
||||
// found
|
||||
// symbolically differentiable subgraphs for further optimizations.
|
||||
// Phase 5. Apply non-differentiable optimizations to the graphs we've found
|
||||
// (or the whole graph if we know we won't need its derivative).
|
||||
if (needsGradient(opt_graph)) {
|
||||
auto diff_nodes = CreateAutodiffSubgraphs(
|
||||
@ -783,8 +781,8 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
|
||||
ArgumentSpecCreator arg_spec_creator_;
|
||||
// Populated only when optimize is false (and in that case plan_cache will
|
||||
// be unused). The compiled version of graph.
|
||||
// Populated only when optimize is false (and in that case plan_cache will be
|
||||
// unused). The compiled version of graph.
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
|
||||
ExecutionPlan fallback;
|
||||
|
||||
|
@ -49,12 +49,12 @@ using torch::distributed::autograd::DistAutogradContainer;
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_enable_rethrow_caught_exception,
|
||||
false,
|
||||
"enable rethrowing caught exception")
|
||||
"enable rethrowing caught exception");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_enable_expanded_stacks,
|
||||
false,
|
||||
"When true we will attemps to pre-expand node stacks and cache expanded stacks.")
|
||||
"When true we will attemps to pre-expand node stacks and cache expanded stacks.");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
|
@ -41,32 +41,32 @@
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_enable_new_executor,
|
||||
true,
|
||||
"If this flag is set to false TorchScript will be using the legacy/original executor")
|
||||
"If this flag is set to false TorchScript will be using the legacy/original executor");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_disable_warning_prints,
|
||||
false,
|
||||
"Disables warning.warn prints in TorchScript graph")
|
||||
"Disables warning.warn prints in TorchScript graph");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_static_then_dynamic,
|
||||
false,
|
||||
"fuse on two static compilations then 10 dynamic")
|
||||
"fuse on two static compilations then 10 dynamic");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_always_dynamic,
|
||||
false,
|
||||
"fuse on 12 dynamic compilations")
|
||||
"fuse on 12 dynamic compilations");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_jit_release_profiling_graph_after_optimization,
|
||||
false,
|
||||
"After getOptimizedPlanFor release the optimization record for reduction of memory in inference. This is aggressive memory saving, and please be cautious!")
|
||||
"After getOptimizedPlanFor release the optimization record for reduction of memory in inference. This is aggressive memory saving, and please be cautious!");
|
||||
|
||||
C10_DEFINE_int32(
|
||||
torch_jit_release_profiling_graph_delay_in_seconds,
|
||||
60,
|
||||
"How long to wait before releasing the profiling graph after optimizaiton is done. Only used if torch_jit_release_profiling_graph_after_optimization is set to true.")
|
||||
"How long to wait before releasing the profiling graph after optimizaiton is done. Only used if torch_jit_release_profiling_graph_after_optimization is set to true.");
|
||||
|
||||
constexpr size_t kDefaultNumProfiledRuns = 1;
|
||||
constexpr size_t kDefaultBailoutDepth = 20;
|
||||
@ -74,11 +74,11 @@ constexpr size_t kDefaultBailoutDepth = 20;
|
||||
C10_DEFINE_int64(
|
||||
torch_jit_num_profiled_runs,
|
||||
kDefaultNumProfiledRuns,
|
||||
"Number of profiling runs")
|
||||
"Number of profiling runs");
|
||||
C10_DEFINE_int64(
|
||||
torch_jit_bailout_depth,
|
||||
kDefaultBailoutDepth,
|
||||
"Number of re-specializations")
|
||||
"Number of re-specializations");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -50,7 +50,7 @@
|
||||
C10_DEFINE_bool(
|
||||
static_runtime_disable_debug_memory_overlap_check,
|
||||
false,
|
||||
"If true, disable the memory overlap check in debug mode in ProcessedNode::run()")
|
||||
"If true, disable the memory overlap check in debug mode in ProcessedNode::run()");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
|
@ -72,7 +72,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
// put output back
|
||||
p_node->Output(0) = std::move(stack[0]);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::TupleUnpack,
|
||||
@ -91,7 +91,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(i) = elems[i];
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::DictConstruct,
|
||||
@ -116,7 +116,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
p_node->Output(0) = result;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// See [Borrowed IValue Outputs]
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
@ -139,7 +139,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(i - 1) = createBorrowedIValue(value->value());
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::__getitem__, aten_getitem, [](Node* n) -> SROperator {
|
||||
if (!sr_schema_check(
|
||||
@ -177,7 +177,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::__getitem__, aten_getitem, [](Node* n) ->
|
||||
|
||||
// TODO(T98581096): make __getitem__ work for other container types
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::ListConstruct,
|
||||
@ -197,7 +197,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
// put output back
|
||||
p_node->Output(0) = std::move(stack[0]);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::ListUnpack,
|
||||
@ -219,7 +219,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(i) = list[i];
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::append,
|
||||
@ -233,7 +233,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
auto list = p_node->Input(0).toList();
|
||||
list.push_back(p_node->Input(1));
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::list,
|
||||
@ -260,7 +260,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::numel,
|
||||
@ -273,7 +273,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& arg = p_node->Input(0).toTensor();
|
||||
p_node->Output(0) = arg.numel();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::cpu,
|
||||
@ -286,7 +286,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& arg = p_node->Input(0).toTensor();
|
||||
p_node->Output(0) = arg.cpu();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::__range_length,
|
||||
@ -312,7 +312,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(0) = 0;
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::index_put, aten_index_put, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -332,7 +332,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::index_put, aten_index_put, [](Node* n) ->
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::item,
|
||||
@ -345,7 +345,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& self = p_node->Input(0).toTensor();
|
||||
p_node->Output(0) = at::native::item(self);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::GetAttr,
|
||||
@ -362,7 +362,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto slot = type.getAttributeSlot(field);
|
||||
p_node->Output(0) = module.getSlot(slot);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::SetAttr,
|
||||
@ -379,7 +379,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto slot = type.getAttributeSlot(field);
|
||||
module.setSlot(slot, p_node->Input(1));
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::transpose,
|
||||
@ -396,7 +396,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto in2_i = p_node->Input(2).toInt();
|
||||
p_node->Output(0) = at::native::transpose(in0_t, in1_i, in2_i);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::flatten, aten_flatten, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -410,7 +410,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::flatten, aten_flatten, [](Node* n) -> SRO
|
||||
const auto in2_i = p_node->Input(2).toInt();
|
||||
p_node->Output(0) = at::native::flatten(in0_t, in1_i, in2_i);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::permute,
|
||||
@ -426,7 +426,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto in1_iv = p_node->Input(1).toDimVector();
|
||||
p_node->Output(0) = at::native::permute(in0_t, in1_iv);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::reshape,
|
||||
@ -442,7 +442,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto in1_iv = p_node->Input(1).toDimVector();
|
||||
p_node->Output(0) = at::native::reshape(in0_t, in1_iv);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::slice, aten_slice, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -458,7 +458,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::slice, aten_slice, [](Node* n) -> SROpera
|
||||
const auto in4_i = p_node->Input(4).toInt();
|
||||
p_node->Output(0) = at::native::slice(in0_t, in1_i, in2_i, in3_i, in4_i);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::narrow, aten_narrow, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -497,7 +497,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::narrow, aten_narrow, [](Node* n) -> SROpe
|
||||
").");
|
||||
p_node->Output(0) = at::native::slice(self, dim, start, start + length, 1);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -544,7 +544,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::detach,
|
||||
@ -559,7 +559,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& in0_t = p_node->Input(0).toTensor();
|
||||
p_node->Output(0) = at::native::alias(in0_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::expand_as,
|
||||
@ -575,7 +575,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& other = p_node->Input(1).toTensor();
|
||||
p_node->Output(0) = self.expand(other.sizes());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::isinstance,
|
||||
@ -600,7 +600,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
|
||||
p_node->Output(0) = false;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::TypeCheck,
|
||||
@ -633,7 +633,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
|
||||
p_node->Output(num_inputs) = true;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// See [Borrowed IValue Outputs]
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
@ -653,7 +653,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::view,
|
||||
@ -669,7 +669,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto size = p_node->Input(1).toIntList();
|
||||
p_node->Output(0) = at::native::view(input, size.vec());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::size,
|
||||
@ -696,7 +696,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::squeeze,
|
||||
@ -713,7 +713,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto dim = p_node->Input(1).toInt();
|
||||
p_node->Output(0) = at::native::squeeze(self, dim);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::split, aten_split, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -739,7 +739,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::split, aten_split, [](Node* n) -> SROpera
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::split_with_sizes,
|
||||
@ -759,7 +759,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(0) =
|
||||
at::native::split_with_sizes(self, split_sizes.vec(), dim);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
static_runtime::select_tensor,
|
||||
@ -788,7 +788,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
IValue(c10::MaybeOwnedTraits<at::TensorBase>::createBorrow(
|
||||
assignFrom.toTensor()));
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::mul,
|
||||
@ -814,7 +814,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
pnode->Output(0) = ret;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::sub,
|
||||
@ -829,7 +829,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto b = pnode->Input(1).toInt();
|
||||
pnode->Output(0) = a - b;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::add,
|
||||
@ -855,7 +855,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::tensor_split, aten_tensor_split, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -889,7 +889,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::tensor_split, aten_tensor_split, [](Node*
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::Int,
|
||||
@ -903,7 +903,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& input = pnode->Input(0).toTensor();
|
||||
pnode->Output(0) = at::native::item(input).toInt();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// See [Create owned refs for special values]
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
@ -915,7 +915,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
return
|
||||
[](ProcessedNode* p_node) { p_node->Output(0) = p_node->Input(0); };
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
bool outputsEmpty(const Block* block) {
|
||||
@ -1020,7 +1020,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
return [](ProcessedNode*) {};
|
||||
}
|
||||
return [](ProcessedNode*) {};
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
|
||||
@ -1147,7 +1147,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
smodule, args, future, *launcher);
|
||||
(*launcher)(std::move(runtime_launcher));
|
||||
};
|
||||
})
|
||||
});
|
||||
/*
|
||||
aten::wait waits on the future (present in corresponding fork)
|
||||
to be executed. Once the execution is complete, the future is marked
|
||||
@ -1181,7 +1181,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(i) = elems[i];
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::Loop,
|
||||
@ -1225,7 +1225,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
p_node->Output(i) = std::move(args[i + 1]);
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::CreateObject,
|
||||
@ -1240,7 +1240,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
c10::StrongTypePtr(class_type->compilation_unit(), class_type),
|
||||
class_type->numAttributes());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::TupleIndex,
|
||||
@ -1262,7 +1262,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
pnode->Output(0) = elems[norm_idx];
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::RaiseException,
|
||||
@ -1275,7 +1275,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& message = pnode->Input(0).toStringRef();
|
||||
throw std::runtime_error(message);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::Uninitialized,
|
||||
@ -1287,7 +1287,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
return [](ProcessedNode* pnode) {
|
||||
pnode->Output(0) = IValue::uninitialized();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::format,
|
||||
@ -1304,7 +1304,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
TORCH_DCHECK_EQ(stack.size(), 1);
|
||||
pnode->Output(0) = std::move(stack[0]);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::device,
|
||||
@ -1317,7 +1317,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& input = pnode->Input(0).toTensor();
|
||||
pnode->Output(0) = input.device();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::dtype,
|
||||
@ -1330,7 +1330,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& input = pnode->Input(0).toTensor();
|
||||
pnode->Output(0) = static_cast<int64_t>(input.scalar_type());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::dim,
|
||||
@ -1343,7 +1343,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& input = pnode->Input(0).toTensor();
|
||||
pnode->Output(0) = input.dim();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::__not__,
|
||||
@ -1356,7 +1356,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
auto input = pnode->Input(0).toBool();
|
||||
pnode->Output(0) = !input;
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::Bool,
|
||||
@ -1382,7 +1382,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::is_cuda,
|
||||
@ -1395,7 +1395,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& input = pnode->Input(0).toTensor();
|
||||
pnode->Output(0) = input.is_cuda();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
prim::tolist,
|
||||
@ -1413,7 +1413,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
TORCH_DCHECK_EQ(stack.size(), 1);
|
||||
pnode->Output(0) = std::move(stack[0]);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// See [Borrowed IValue Outputs]
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
@ -1428,7 +1428,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
pnode->Output(0) = condition ? createBorrowedIValue(pnode->Input(1))
|
||||
: createBorrowedIValue(pnode->Input(2));
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::len,
|
||||
@ -1474,7 +1474,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::IntImplicit,
|
||||
@ -1500,7 +1500,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
}
|
||||
pnode->Output(0) = at::native::item(tensor).toInt();
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::select,
|
||||
@ -1517,7 +1517,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto index = pnode->Input(2).toInt();
|
||||
pnode->Output(0) = at::native::select(self, dim, index);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
aten::reshape_as,
|
||||
@ -1533,6 +1533,6 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
|
||||
const auto& other = pnode->Input(1).toTensor();
|
||||
pnode->Output(0) = at::native::reshape(self, other.sizes());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
} // namespace torch::jit
|
||||
|
@ -37,6 +37,8 @@
|
||||
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
|
||||
#include <torch/csrc/jit/tensorexpr/loopnest.h>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <ATen/CompositeExplicitAutogradFunctions.h>
|
||||
|
||||
@ -44,9 +46,10 @@ C10_DEFINE_bool(
|
||||
static_runtime_enable_fast_math,
|
||||
true,
|
||||
"If on, static runtime may use use optimizations that cause accuracy loss "
|
||||
"vs the jit interpreter")
|
||||
"vs the jit interpreter");
|
||||
|
||||
namespace at::native {
|
||||
|
||||
static void repeat_out(
|
||||
at::Tensor& result,
|
||||
const Tensor& self,
|
||||
@ -137,9 +140,9 @@ static at::Tensor& flatten_copy_out(
|
||||
|
||||
// We don't want to infer_size on the entire shape, because that can give us
|
||||
// an extra degree of freedom we don't want; for example, consider shape [0,
|
||||
// 1, 3, 0], with start_dim=1, end_dim=2. It's clear we want result shape
|
||||
// [0, 3, 0] but passing [0, -1, 0] to infer_size means the -1 can take on
|
||||
// any value and satisfy the constraints.
|
||||
// 1, 3, 0], with start_dim=1, end_dim=2. It's clear we want result shape [0,
|
||||
// 3, 0] but passing [0, -1, 0] to infer_size means the -1 can take on any
|
||||
// value and satisfy the constraints.
|
||||
auto iter = self.sizes().data();
|
||||
auto slice_numel = std::accumulate(
|
||||
iter + start_dim,
|
||||
@ -323,8 +326,8 @@ static Tensor& c2_argmin_out(
|
||||
return true;
|
||||
}
|
||||
// if a is not nan and b is nan, then a is not less than b
|
||||
// with LessOrNan semantics otherwise, act normally. If `b`
|
||||
// is NaN then a < b will always return false, so this is
|
||||
// with LessOrNan semantics otherwise, act normally. If `b` is
|
||||
// NaN then a < b will always return false, so this is
|
||||
// equivalent to the first snippet.
|
||||
return a < b;
|
||||
});
|
||||
@ -375,7 +378,7 @@ static at::Tensor& dequantize_copy_out(Tensor& out, const Tensor& self) {
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
C10_DEFINE_REGISTRY(SROperatorRegistry, SROperatorFunctor)
|
||||
C10_DEFINE_REGISTRY(SROperatorRegistry, SROperatorFunctor);
|
||||
|
||||
bool opIsRegistered(const c10::Symbol& op_name) {
|
||||
const std::string name(op_name.toQualString());
|
||||
@ -502,7 +505,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
}
|
||||
listConstructSlowPath(type, size, p_node);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
static void tupleConstructSlowPath(const size_t size, ProcessedNode* p_node) {
|
||||
// prepare inputs
|
||||
@ -554,7 +557,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
}
|
||||
tupleConstructSlowPath(size, p_node);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::abs, aten_abs, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema("aten::abs(Tensor self) -> Tensor"))) {
|
||||
@ -571,7 +574,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::abs, aten_abs, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::native::abs_out(in0_t, out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::mul, aten_mul, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -591,7 +594,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::mul, aten_mul, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::mul_out(out_t, in0_t, in1_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::addmm, aten_addmm, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -613,7 +616,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::addmm, aten_addmm, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::addmm_out(out_t, in0_t, in1_t, in2_t, in3_s, in4_s);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
// Disable externally to avoid MSVC errors in open-source CI
|
||||
@ -673,9 +676,9 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
&clamp_min,
|
||||
&clamp_max,
|
||||
&nan,
|
||||
&output_size})
|
||||
&output_size});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
#endif
|
||||
|
||||
@ -720,7 +723,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::clamp, aten_clamp, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::bmm, aten_bmm, [](Node* n) -> SROperator {
|
||||
if (!n->matches(
|
||||
@ -738,7 +741,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::bmm, aten_bmm, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::bmm_out(out_t, in0_t, in1_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::nan_to_num, aten_nan_to_num, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -759,7 +762,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::nan_to_num, aten_nan_to_num, [](Node* n) -> SROp
|
||||
fastResizeToZero(out_t);
|
||||
at::native::nan_to_num_out(in0_t, in1_d, in2_d, in3_d, out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
|
||||
@ -892,7 +895,7 @@ static SROperator aten_stack(Node* n) {
|
||||
};
|
||||
}
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::stack, aten_stack, aten_stack)
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::stack, aten_stack, aten_stack);
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
prim::VarStack,
|
||||
@ -910,7 +913,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
}
|
||||
varStackOut(*p_node, dim);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::leaky_relu, aten_leaky_relu, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -928,7 +931,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::leaky_relu, aten_leaky_relu, [](Node* n) -> SROp
|
||||
auto& out_t = p_node->Output(0).toTensor();
|
||||
at::cpu::leaky_relu_out(out_t, in0_t, in1_s);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::relu, aten_relu, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema("aten::relu(Tensor self) -> Tensor"))) {
|
||||
@ -951,7 +954,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::relu, aten_relu, [](Node* n) -> SROperator {
|
||||
int64_t nn = in0_t.numel();
|
||||
te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::tanh, aten_tanh, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema("aten::tanh(Tensor self) -> Tensor"))) {
|
||||
@ -974,7 +977,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::tanh, aten_tanh, [](Node* n) -> SROperator {
|
||||
int64_t nn = in0_t.numel();
|
||||
te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
prim::TensorExprDynamicGroup,
|
||||
@ -1009,7 +1012,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
}
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
aten::sigmoid,
|
||||
@ -1035,7 +1038,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
int64_t nn = in0_t.numel();
|
||||
te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::logit, aten_logit, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -1070,7 +1073,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::logit, aten_logit, [](Node* n) -> SROperator {
|
||||
float c = clamp_value;
|
||||
te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn, &c});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::clone, aten_clone, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -1111,7 +1114,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::clone, aten_clone, [](Node* n) -> SROperator {
|
||||
out_t.unsafeGetTensorImpl(), src.sizes(), src.strides());
|
||||
at::native::copy_(out_t, src, false);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
quantized::embedding_bag_byte_rowwise_offsets,
|
||||
@ -1149,7 +1152,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
compressed_indices_mapping,
|
||||
include_last_offset);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
quantized::embedding_bag_4bit_rowwise_offsets,
|
||||
@ -1187,7 +1190,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
compressed_indices_mapping,
|
||||
include_last_offset);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
quantized::embedding_bag_byte_prepack,
|
||||
@ -1208,7 +1211,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(out_t);
|
||||
at::native::qembeddingbag_byte_prepack_out(out_t, weight);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// The out variant takes precedence over native
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::narrow_copy, aten_narrow_copy, [](Node* n) -> SROperator {
|
||||
@ -1238,7 +1241,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::narrow_copy, aten_narrow_copy, [](Node* n) -> SR
|
||||
fastResizeToZero(output);
|
||||
at::native::narrow_copy_dense_cpu_out(self, dim, start, length, output);
|
||||
};
|
||||
})
|
||||
});
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::index, aten_index, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
"aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor"))) {
|
||||
@ -1257,7 +1260,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::index, aten_index, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::index_out(out_t, in0_t, in1_l);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
aten::index_select,
|
||||
@ -1280,7 +1283,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(out);
|
||||
at::native::index_select_out_cpu_(self, dim, index, out);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::pow, aten_pow, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -1342,7 +1345,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::pow, aten_pow, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
|
||||
@ -1620,7 +1623,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
return to_maybe_copy_out_functor<false, false>;
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
// out variant takes precedence over native
|
||||
// NB: This impl doesn't work for cpu->cuda copy/cast or vice versa.
|
||||
@ -1643,7 +1646,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
const bool has_memory_format = n->inputs().size() == 5;
|
||||
return get_to_copy_functor(
|
||||
has_constant_non_tensor_dtype_and_flags, has_memory_format);
|
||||
})
|
||||
});
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
@ -1668,7 +1671,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(out_t);
|
||||
at::native::dequantize_copy_out(out_t, self);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// Out variants for view ops are registered to a separate registry because
|
||||
// their outputs (views) can't participate in memory reuse.
|
||||
@ -1692,7 +1695,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
auto& out = p_node->Output(0).toTensor();
|
||||
at::native::reshape_copy_out(out, self, proposed_shape, true);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
static_runtime::flatten_copy,
|
||||
@ -1715,7 +1718,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
auto& out = p_node->Output(0).toTensor();
|
||||
at::native::flatten_copy_out(out, self, start_dim, end_dim);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::sum, aten_sum, [](Node* n) -> SROperator {
|
||||
if (n->inputs().size() != 2 && n->inputs().size() != 4) {
|
||||
@ -1755,7 +1758,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sum, aten_sum, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::mean, aten_mean, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -1792,7 +1795,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::mean, aten_mean, [](Node* n) -> SROperator {
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::repeat, aten_repeat, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -1811,7 +1814,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::repeat, aten_repeat, [](Node* n) -> SROperator {
|
||||
at::Tensor& output = p_node->Output(0).toTensor();
|
||||
at::native::repeat_out(output, self, repeats);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::max, aten_max, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -1866,7 +1869,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::max, aten_max, [](Node* n) -> SROperator {
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::sign, aten_sign, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema("aten::sign.Tensor(Tensor input) -> Tensor"))) {
|
||||
@ -1883,7 +1886,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sign, aten_sign, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::sign_out(out_t, in0_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::div, aten_div, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -1941,7 +1944,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::div, aten_div, [](Node* n) -> SROperator {
|
||||
at::cpu::div_out(out_t, in0_t, in1_t, rounding_mode);
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::log, aten_log, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema("aten::log.Tensor(Tensor input) -> Tensor"))) {
|
||||
@ -1958,7 +1961,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::log, aten_log, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::log_out(out_t, in0_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::sub, aten_sub, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -1994,7 +1997,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sub, aten_sub, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
// TODO: support clamp_min.Tensor(Tensor self, Tensor min) -> Tensor
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
@ -2017,7 +2020,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::clamp_min_out(out_t, in0_t, in1_s);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::argmin, aten_argmin, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2041,7 +2044,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::argmin, aten_argmin, [](Node* n) -> SROperator {
|
||||
}
|
||||
at::cpu::argmin_out(out_t, in0_t, dim, keepdim);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::softmax, aten_softmax, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2063,7 +2066,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::softmax, aten_softmax, [](Node* n) -> SROperator
|
||||
dtype == at::ScalarType::Float;
|
||||
at::cpu::_softmax_out(out_t, in_t, dim, half_to_float);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
|
||||
@ -2119,7 +2122,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::layer_norm, aten_layer_norm, [](Node* n) -> SROp
|
||||
at::Tensor& output = p_node->Output(0).toTensor();
|
||||
at::native::layer_norm_cpu_out(output, *X, *gamma, *beta, eps, M, N);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::norm, aten_norm, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -2184,7 +2187,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::norm, aten_norm, [](Node* n) -> SROperator {
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::matmul, aten_matmul, [](Node* n) -> SROperator {
|
||||
if (!n->matches(
|
||||
@ -2204,7 +2207,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::matmul, aten_matmul, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::native::matmul_out(in0_t, in1_t, out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(quantized::linear, quantized_linear, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2246,7 +2249,7 @@ REGISTER_OPERATOR_FUNCTOR(quantized::linear, quantized_linear, [](Node* n) -> SR
|
||||
input, output_scale, output_zero_point, out_t);
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
fb::quantized_linear,
|
||||
@ -2293,7 +2296,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
input, output_scale, output_zero_point, out_t);
|
||||
}
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
|
||||
@ -2373,7 +2376,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
return nullptr;
|
||||
}
|
||||
return quantized_linear_dynamic_fp16_impl<false>(n);
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
quantized::linear_relu_dynamic_fp16,
|
||||
@ -2386,7 +2389,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
return nullptr;
|
||||
}
|
||||
return quantized_linear_dynamic_fp16_impl<true>(n);
|
||||
})
|
||||
});
|
||||
|
||||
// device & pin_memory matter only when CUDA is enabled.
|
||||
static bool hasTensorWithOptions(
|
||||
@ -2435,7 +2438,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::full, aten_full, [](Node* n) -> SROperator {
|
||||
p_node->Output(0) =
|
||||
at::native::full_out(size, fill_value, p_node->Output(0).toTensor());
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::full_like, aten_full_like, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2461,7 +2464,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::full_like, aten_full_like, [](Node* n) -> SROper
|
||||
at::native::resize_(out_t, in0_t.sizes(), std::nullopt);
|
||||
at::native::fill_out(out_t, in1_s);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::ones, aten_ones, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2484,7 +2487,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::ones, aten_ones, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::native::ones_out(size, out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::ones_like, aten_ones_like, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2509,7 +2512,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::ones_like, aten_ones_like, [](Node* n) -> SROper
|
||||
fastResizeToZero(out_t);
|
||||
at::native::ones_out(self.sizes(), out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::zeros, aten_zeros, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2530,7 +2533,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::zeros, aten_zeros, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::compositeexplicitautograd::zeros_out(out_t, size);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::linear, aten_linear, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2552,7 +2555,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::linear, aten_linear, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(out_t);
|
||||
at::native::linear_out(out_t, in0_t, in1_t, in2_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::linalg_norm, aten_linalg_norm, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -2602,7 +2605,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::linalg_norm, aten_linalg_norm, [](Node* n) -> SR
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::cat, aten_cat, [](Node* n) -> SROperator {
|
||||
if (!n->matches(
|
||||
@ -2622,7 +2625,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::cat, aten_cat, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(output);
|
||||
at::cpu::cat_outf(inputs, dim, output);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::cumsum, aten_cumsum, [](Node* n) -> SROperator {
|
||||
if (!n->matches(torch::schema(
|
||||
@ -2642,7 +2645,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::cumsum, aten_cumsum, [](Node* n) -> SROperator {
|
||||
fastResizeToZero(output);
|
||||
at::cpu::cumsum_out(output, input, dim, dtype);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
aten::nonzero,
|
||||
@ -2662,7 +2665,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(output);
|
||||
at::native::nonzero_out_cpu(input, output);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
@ -2687,7 +2690,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
fastResizeToZero(out_t);
|
||||
at::cpu::cat_outf(inputs, dim, out_t);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
namespace {
|
||||
// This template and its specialization help us avoid compiler warnings
|
||||
@ -2749,7 +2752,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
int64_t nn = input.numel();
|
||||
te->call({out.data_ptr(), input.data_ptr(), &nn});
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
aten::remainder,
|
||||
@ -2787,7 +2790,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
// Unrecognized overload
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(aten::where, aten_where, [](Node* n) -> SROperator {
|
||||
if (n->matches(torch::schema(
|
||||
@ -2808,7 +2811,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::where, aten_where, [](Node* n) -> SROperator {
|
||||
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
prim::NumToTensor,
|
||||
@ -2830,7 +2833,7 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
}
|
||||
LogAndDumpSchema(n);
|
||||
return nullptr;
|
||||
})
|
||||
});
|
||||
|
||||
REGISTER_OPERATOR_FUNCTOR(
|
||||
quantized::embedding_bag_byte_unpack,
|
||||
@ -2852,6 +2855,6 @@ REGISTER_OPERATOR_FUNCTOR(
|
||||
auto& out = pnode->Output(0).toTensor();
|
||||
at::native::qembeddingbag_byte_unpack_out(out, weight);
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
} // namespace torch::jit
|
||||
|
@ -12,9 +12,10 @@
|
||||
C10_DEFINE_bool(
|
||||
enable_clip_ranges_gather_fusions,
|
||||
true,
|
||||
"If on, static runtime or optimize_sparse_nn_model will fuse clip ranges gather ops.")
|
||||
"If on, static runtime or optimize_sparse_nn_model will fuse clip ranges gather ops.");
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
bool graphHasOp(std::shared_ptr<Graph>& graph, const char* op_name) {
|
||||
DepthFirstGraphNodeIterator graph_it(graph);
|
||||
for (auto node = graph_it.next(); node != nullptr; node = graph_it.next()) {
|
||||
@ -714,8 +715,8 @@ static void ReplaceWithCopyImpl(
|
||||
// b and c are aliases of a, sigmoid_ changes b, c, as well as a. e should
|
||||
// equal to d in this case. If we replace reshape with the copy version, b
|
||||
// and c are no longer aliases of a, the value of e would change as a
|
||||
// result. To keep static runtime consistent with the jit interpreter,
|
||||
// here we choose not to replace reshape with the copy version
|
||||
// result. To keep static runtime consistent with the jit interpreter, here
|
||||
// we choose not to replace reshape with the copy version
|
||||
if (db.hasInputWriters(n)) {
|
||||
continue;
|
||||
}
|
||||
@ -1085,8 +1086,8 @@ void ForceNonEmptyOutputsHelper(Value* none_value, Block* block) {
|
||||
}
|
||||
|
||||
if (needs_output) {
|
||||
// Loop sub-blocks should always return at least one output (the new
|
||||
// loop condition)
|
||||
// Loop sub-blocks should always return at least one output (the new loop
|
||||
// condition)
|
||||
DCHECK(node->kind() == prim::If);
|
||||
auto* output = node->addOutput();
|
||||
output->setType(c10::NoneType::get());
|
||||
@ -1339,8 +1340,8 @@ bool isNoOpSlice(Node* node) {
|
||||
return false;
|
||||
}
|
||||
auto end = toIValue(node->input(2));
|
||||
// Could also look at list length, but most models that have this pattern
|
||||
// are just doing list[0:], so it's not needed for now.
|
||||
// Could also look at list length, but most models that have this pattern are
|
||||
// just doing list[0:], so it's not needed for now.
|
||||
return end.has_value() && end->isNone();
|
||||
}
|
||||
} // namespace
|
||||
|
@ -538,7 +538,7 @@ struct FileCheckImpl {
|
||||
std::vector<std::vector<Check>> groups;
|
||||
};
|
||||
|
||||
FileCheck::FileCheck() : fcImpl(new FileCheckImpl()) {}
|
||||
FileCheck::FileCheck() : fcImpl(new FileCheckImpl()){};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const FileCheckImpl& fc) {
|
||||
out << "FileCheck checks:\n";
|
||||
@ -546,7 +546,7 @@ std::ostream& operator<<(std::ostream& out, const FileCheckImpl& fc) {
|
||||
out << "\t" << c << "\n";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
FileCheck::~FileCheck() {
|
||||
if (!fcImpl->has_run) {
|
||||
@ -554,17 +554,17 @@ FileCheck::~FileCheck() {
|
||||
std::cout << *fcImpl;
|
||||
}
|
||||
fcImpl.reset();
|
||||
}
|
||||
};
|
||||
|
||||
void FileCheck::run(const std::string& test_file) {
|
||||
fcImpl->run(test_file);
|
||||
}
|
||||
};
|
||||
|
||||
void FileCheck::run(const Graph& graph) {
|
||||
std::stringstream graph_str;
|
||||
graph_str << graph;
|
||||
fcImpl->run(graph_str.str());
|
||||
}
|
||||
};
|
||||
|
||||
void FileCheck::run(
|
||||
const std::string& input_checks_string,
|
||||
|
@ -6,74 +6,74 @@ C10_DEFINE_bool(torch_lazy_ir_debug, false, "Enable lazy tensor IR debugging");
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_param_aliasing,
|
||||
true,
|
||||
"Enable parameter aliasing support")
|
||||
"Enable parameter aliasing support");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_handle_special_scalars,
|
||||
false,
|
||||
"Handle special scalars 0 and 1 differently")
|
||||
"Handle special scalars 0 and 1 differently");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_all_numbers_special_scalars,
|
||||
false,
|
||||
"Handle all numbers as special scalars")
|
||||
"Handle all numbers as special scalars");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_reuse_ir,
|
||||
false,
|
||||
"Reuse IR nodes from previous tracing when possible")
|
||||
"Reuse IR nodes from previous tracing when possible");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_use_thread_pool,
|
||||
false,
|
||||
"Use thread pool to schedule backend execution")
|
||||
"Use thread pool to schedule backend execution");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_enable_device_data_cache,
|
||||
true,
|
||||
"Enable or disable device data cache (turns cache on or off), does not change cache state")
|
||||
"Enable or disable device data cache (turns cache on or off), does not change cache state");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_compilation_cache_size,
|
||||
1024,
|
||||
"Size of the compilation cache")
|
||||
"Size of the compilation cache");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_device_data_cache_size,
|
||||
128,
|
||||
"Size of the DeviceData cache")
|
||||
"Size of the DeviceData cache");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_io_thread_pool_size,
|
||||
// TODO: measure which default value
|
||||
// will give better performance,
|
||||
// std::thread::hardware_concurrency()?
|
||||
// TODO: measure which default value will give better
|
||||
// performance, std::thread::hardware_concurrency()?
|
||||
1,
|
||||
"Size of the execution thread pool")
|
||||
"Size of the execution thread pool");
|
||||
|
||||
C10_DEFINE_int(torch_lazy_metrics_samples, 1024, "Max metrics sample size")
|
||||
C10_DEFINE_int(torch_lazy_metrics_samples, 1024, "Max metrics sample size");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_trim_graph_check_frequency,
|
||||
5000,
|
||||
"How often to check for whether a graph needs to be split")
|
||||
"How often to check for whether a graph needs to be split");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_trim_graph_size,
|
||||
100000,
|
||||
"The threshold (in terms of the number of nodes) for splitting a graph")
|
||||
"The threshold (in terms of the number of nodes) for splitting a graph");
|
||||
|
||||
C10_DEFINE_string(
|
||||
torch_lazy_metrics_percentiles,
|
||||
"0.01:0.05:0.1:0.2:0.5:0.8:0.9:0.95:0.99",
|
||||
"Metrics percentiles to be collected, using : as the delimiter")
|
||||
"Metrics percentiles to be collected, using : as the delimiter");
|
||||
|
||||
C10_DEFINE_int(
|
||||
torch_lazy_shape_cache_size,
|
||||
4096,
|
||||
"Set the size for the shape cache used for shape inference")
|
||||
"Set the size for the shape cache used for shape inference");
|
||||
|
||||
namespace torch::lazy {
|
||||
|
||||
std::string& getLTCForceFallback() {
|
||||
static std::string config;
|
||||
static bool _ignore = [&]() {
|
||||
|
@ -35,13 +35,13 @@ class TORCH_API DimensionNode {
|
||||
public:
|
||||
virtual bool isSymbolic() const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
virtual int64_t getDynamicValue() const {
|
||||
TORCH_CHECK(false, "NYI");
|
||||
}
|
||||
};
|
||||
virtual int64_t getStaticValue() const {
|
||||
TORCH_CHECK(false, "NYI");
|
||||
}
|
||||
};
|
||||
virtual ~DimensionNode() = default;
|
||||
};
|
||||
|
||||
|
@ -10,9 +10,10 @@
|
||||
C10_DEFINE_bool(
|
||||
ltc_enable_dynamic_shapes,
|
||||
false,
|
||||
"Whether dynamic shape is enabled")
|
||||
"Whether dynamic shape is enabled");
|
||||
|
||||
namespace torch::lazy {
|
||||
|
||||
static const torch::lazy::Output kNullOutput = torch::lazy::Output();
|
||||
|
||||
size_t Output::Hasher::operator()(const Output& output) const {
|
||||
|
@ -30,7 +30,7 @@ SizeNode::SizeNode(Value input, size_t dim)
|
||||
std::vector<Shape>{},
|
||||
1,
|
||||
MHash(dim)),
|
||||
dim_(dim) {}
|
||||
dim_(dim){};
|
||||
|
||||
int64_t SizeNode::getStaticValue() const {
|
||||
return dynamic_cast<const TsNode*>(operand(0).node)
|
||||
@ -55,7 +55,7 @@ SizeAdd::SizeAdd(Value a, Value b)
|
||||
OpKind{c10::Symbol::fromQualString("aten::add")},
|
||||
{std::move(a), std::move(b)},
|
||||
std::vector<Shape>{},
|
||||
1) {}
|
||||
1){};
|
||||
|
||||
int64_t SizeAdd::getStaticValue() const {
|
||||
return DimCast(operand(0))->getStaticValue() +
|
||||
@ -75,7 +75,7 @@ SizeMul::SizeMul(Value a, Value b)
|
||||
OpKind{c10::Symbol::fromQualString("aten::mul")},
|
||||
{std::move(a), std::move(b)},
|
||||
std::vector<Shape>{},
|
||||
1) {}
|
||||
1){};
|
||||
|
||||
int64_t SizeMul::getStaticValue() const {
|
||||
return DimCast(operand(0))->getStaticValue() *
|
||||
@ -95,7 +95,7 @@ SizeDiv::SizeDiv(Value a, Value b)
|
||||
OpKind{c10::Symbol::fromQualString("aten::div")},
|
||||
{std::move(a), std::move(b)},
|
||||
std::vector<Shape>{},
|
||||
1) {}
|
||||
1){};
|
||||
|
||||
int64_t SizeDiv::getStaticValue() const {
|
||||
TORCH_CHECK(
|
||||
|
@ -268,7 +268,7 @@ at::Tensor LazyNativeFunctions::_to_copy(
|
||||
std::move(node), lazy_self->GetDevice()));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
at::Tensor LazyNativeFunctions::empty_symint(
|
||||
at::SymIntArrayRef sym_size,
|
||||
|
@ -129,7 +129,7 @@ def gen_custom_ops_registration(
|
||||
static_init_dispatch_registrations += f"""
|
||||
TORCH_LIBRARY_IMPL({namespace}, {dispatch_key}, m) {{
|
||||
{dispatch_registrations_body}
|
||||
}}"""
|
||||
}};"""
|
||||
anonymous_definition = "\n".join(
|
||||
list(
|
||||
concatMap(
|
||||
|
@ -1615,7 +1615,7 @@ def get_native_function_definitions(
|
||||
registration_body += f"""
|
||||
TORCH_LIBRARY_IMPL({namespace}, {dispatch_key}, m) {{
|
||||
{newline.join(registrations[kernel_namespace][namespace])}
|
||||
}}"""
|
||||
}};"""
|
||||
definitions.extend(
|
||||
fm.substitute_with_template(
|
||||
"RegisterDispatchDefinitions.ini",
|
||||
|
@ -460,7 +460,7 @@ def gen_dispatcher_registrations(
|
||||
"""\
|
||||
TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) {
|
||||
$dispatch_registrations_body
|
||||
}"""
|
||||
};"""
|
||||
)
|
||||
static_init_dispatch_registrations = static_template.substitute(
|
||||
dispatch_key=dispatch_key,
|
||||
|
Reference in New Issue
Block a user