mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[1/N] Use internal linkage in torch/csrc C++ files. (#150930)
Turn more functions and variables into static if they are not used outside the cpp files. Unused functions are removed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/150930 Approved by: https://github.com/Skylion007 Co-authored-by: Aaron Gokaslan <aaronGokaslan@gmail.com>
This commit is contained in:
committed by
PyTorch MergeBot
parent
48132de4af
commit
24ca7e91e6
@ -133,6 +133,10 @@
|
||||
#include <callgrind.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_ITT
|
||||
#include <torch/csrc/itt.h>
|
||||
#endif
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
static PyObject* module;
|
||||
@ -1757,12 +1761,6 @@ void initModule(PyObject* module);
|
||||
} // namespace torch::xpu
|
||||
#endif
|
||||
|
||||
#ifdef USE_ITT
|
||||
namespace torch::profiler {
|
||||
void initIttBindings(PyObject* module);
|
||||
} // namespace torch::profiler
|
||||
#endif
|
||||
|
||||
static std::vector<PyMethodDef> methods;
|
||||
|
||||
// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
|
||||
|
@ -858,7 +858,7 @@ std::unique_ptr<ProfilerResult> disableProfiler() {
|
||||
return result;
|
||||
}
|
||||
namespace tracer = torch::profiler::impl::python_tracer;
|
||||
std::unique_ptr<tracer::PythonMemoryTracerBase> memory_tracer;
|
||||
static std::unique_ptr<tracer::PythonMemoryTracerBase> memory_tracer;
|
||||
void startMemoryProfile() {
|
||||
if (memory_tracer == nullptr) {
|
||||
memory_tracer = tracer::PythonMemoryTracerBase::make();
|
||||
|
@ -7,9 +7,6 @@
|
||||
|
||||
namespace c10d::intra_node_comm {
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
bool isIntraNodeCommSupported();
|
||||
|
||||
static std::vector<std::string> ENABLE_INTRA_NODE_COMM = {
|
||||
"ENABLE_INTRA_NODE_COMM"};
|
||||
// Forces detectedTopology() to return Topology::FULLY_CONNECTED, so
|
||||
|
@ -87,4 +87,5 @@ class IntraNodeCommWork : public c10d::Work {
|
||||
|
||||
TORCH_API int64_t getIntraNodeCommUsageCounter();
|
||||
|
||||
bool isIntraNodeCommSupported();
|
||||
} // namespace c10d::intra_node_comm
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
namespace torch::dynamo::autograd {
|
||||
|
||||
std::unique_ptr<PyCompilerInterface> kActivePyCompilerInterface;
|
||||
static std::unique_ptr<PyCompilerInterface> kActivePyCompilerInterface;
|
||||
|
||||
const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
|
||||
TORCH_INTERNAL_ASSERT(kActivePyCompilerInterface != nullptr);
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include <torch/csrc/dynamo/framelocals_mapping.h>
|
||||
#include <torch/csrc/utils/python_compat.h>
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
const char* cache_lookup_profiler_str = "TorchDynamo Cache Lookup";
|
||||
static constexpr const char* cache_lookup_profiler_str =
|
||||
"TorchDynamo Cache Lookup";
|
||||
|
||||
// Remember to update the type signature for DynamoCallbackFn.__call__ in
|
||||
// torch/_dynamo/types.py if this function's signature changes.
|
||||
|
@ -9,7 +9,7 @@ static std::array<PyMethodDef, 1> _methods = {{
|
||||
nullptr} // Sentinel value indicating the end of the array
|
||||
}};
|
||||
|
||||
bool is_instancemethod(py::object obj) {
|
||||
static bool is_instancemethod(py::object obj) {
|
||||
return PyInstanceMethod_Check(obj.ptr());
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <torch/csrc/inductor/aoti_package/model_package_loader.h>
|
||||
#include <torch/csrc/inductor/aoti_package/pybind.h>
|
||||
#include <torch/csrc/inductor/aoti_runner/model_container_runner.h>
|
||||
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
|
||||
#ifdef USE_CUDA
|
||||
@ -45,7 +46,6 @@ class AOTIModelPackageLoaderPybind : public AOTIModelPackageLoader {
|
||||
}
|
||||
};
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
void initAOTIPackageBindings(PyObject* module) {
|
||||
auto rootModule = py::handle(module).cast<py::module>();
|
||||
auto m = rootModule.def_submodule("_aoti");
|
||||
|
@ -33,7 +33,9 @@ std::unique_ptr<AOTIModelContainerRunner> create_aoti_runner_cpu(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
RegisterAOTIModelRunner register_cpu_runner("cpu", &create_aoti_runner_cpu);
|
||||
static RegisterAOTIModelRunner register_cpu_runner(
|
||||
"cpu",
|
||||
&create_aoti_runner_cpu);
|
||||
|
||||
} // namespace torch::inductor
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
#ifdef USE_XPU
|
||||
#include <torch/csrc/inductor/aoti_runner/model_container_runner_xpu.h>
|
||||
#endif
|
||||
#include <torch/csrc/inductor/aoti_runner/pybind.h>
|
||||
#include <torch/csrc/inductor/aoti_torch/tensor_converter.h>
|
||||
#include <torch/csrc/inductor/aoti_torch/utils.h>
|
||||
|
||||
|
@ -17,7 +17,7 @@ using namespace torch::aot_inductor;
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
|
||||
template <typename T>
|
||||
c10::List<T> convert_to_c10_List(const T* scalars, const int64_t len) {
|
||||
static c10::List<T> convert_to_c10_List(const T* scalars, const int64_t len) {
|
||||
c10::List<T> scalars_list;
|
||||
scalars_list.reserve(len);
|
||||
for (int64_t i = 0; i < len; i++) {
|
||||
|
@ -17,8 +17,7 @@
|
||||
|
||||
namespace torch::instruction_counter {
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
long start() {
|
||||
static long start() {
|
||||
#if !defined(__linux__)
|
||||
throw std::runtime_error("This systems seems not to be Linux");
|
||||
#else
|
||||
@ -49,8 +48,7 @@ long start() {
|
||||
#endif
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
uint64_t end(int fd) {
|
||||
static uint64_t end(int fd) {
|
||||
#if !defined(__linux__)
|
||||
throw std::runtime_error("This systems seems not to be Linux");
|
||||
#else
|
||||
|
@ -1,8 +1,7 @@
|
||||
#include <torch/csrc/itt.h>
|
||||
#include <torch/csrc/itt_wrapper.h>
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
|
||||
namespace torch::profiler {
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
void initIttBindings(PyObject* module) {
|
||||
auto m = py::handle(module).cast<py::module>();
|
||||
|
||||
|
8
torch/csrc/itt.h
Normal file
8
torch/csrc/itt.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef ITT_H
|
||||
#define ITT_H
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
|
||||
namespace torch::profiler {
|
||||
void initIttBindings(PyObject* module); // namespace torch::profiler
|
||||
}
|
||||
#endif // ITT_H
|
@ -148,7 +148,7 @@ Module::Module(
|
||||
// as we bring up the system since it will degrade performance
|
||||
// and may introduce bugs. test_jit.py provides context managers
|
||||
// that enable it for specific tests.
|
||||
thread_local bool inline_everything = false;
|
||||
static thread_local bool inline_everything = false;
|
||||
bool& getInlineEverythingMode() {
|
||||
return inline_everything;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
namespace torch::jit {
|
||||
|
||||
// Get all types that are shared in the module hierarchy rooted at \p mod.
|
||||
std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
|
||||
static std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
|
||||
// Maintain a set of all TypePtrs.
|
||||
std::unordered_set<TypePtr> types;
|
||||
// Maintain another set of TypePtrs that have been encountered more than once.
|
||||
@ -32,7 +32,7 @@ std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
|
||||
// Selectively lower \p mod to a backend. \p to_backend
|
||||
// is called to lower modules. \p modules_to_lower contains
|
||||
// qualified names of submodules of \p mod that should be lowered.
|
||||
void toBackendSelectiveImpl(
|
||||
static void toBackendSelectiveImpl(
|
||||
Module& mod,
|
||||
const py::function& to_backend,
|
||||
const std::vector<std::string>& modules_to_lower,
|
||||
@ -118,7 +118,7 @@ void toBackendSelectiveImpl(
|
||||
}
|
||||
}
|
||||
|
||||
Module codegen_func(
|
||||
static Module codegen_func(
|
||||
const std::string& backend_name,
|
||||
const Module& orig_module,
|
||||
const py::dict& method_compile_spec) {
|
||||
|
@ -26,7 +26,7 @@ namespace py = pybind11;
|
||||
// torch.tensor([[1.0, -1.0, 2.0, -2.0]]).unsqueeze(-1).unsqueeze(-1)
|
||||
//
|
||||
// In the future, preprocess will accept a dedicated object
|
||||
c10::IValue preprocess(
|
||||
static c10::IValue preprocess(
|
||||
const torch::jit::Module& mod,
|
||||
const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec,
|
||||
const torch::jit::BackendDebugHandleGenerator& generate_debug_handles) {
|
||||
|
@ -40,21 +40,6 @@ constexpr int so_suffix_len = 3;
|
||||
constexpr int cpp_suffix_len = 4;
|
||||
#endif
|
||||
|
||||
intptr_t run(const std::string& cmd);
|
||||
|
||||
static bool programExists(const std::string& program) {
|
||||
std::stringstream ss;
|
||||
c10::printQuotedString(ss, program);
|
||||
at::jit::TemplateEnv env;
|
||||
env.s("program", ss.str());
|
||||
std::string cmd = format(check_exists_string, env);
|
||||
#ifdef _MSC_VER
|
||||
return (run(cmd.c_str()) == 0);
|
||||
#else
|
||||
return (system(cmd.c_str()) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static std::optional<std::wstring> exec(const std::wstring& cmd) {
|
||||
std::array<wchar_t, 128> buffer;
|
||||
@ -143,7 +128,7 @@ static void activate() {
|
||||
}
|
||||
}
|
||||
|
||||
intptr_t run(const std::string& cmd) {
|
||||
static intptr_t run(const std::string& cmd) {
|
||||
// Getting the path of `cmd.exe`
|
||||
const wchar_t* comspec = _wgetenv(L"COMSPEC");
|
||||
if (!comspec) {
|
||||
@ -168,6 +153,19 @@ intptr_t run(const std::string& cmd) {
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool programExists(const std::string& program) {
|
||||
std::stringstream ss;
|
||||
c10::printQuotedString(ss, program);
|
||||
at::jit::TemplateEnv env;
|
||||
env.s("program", ss.str());
|
||||
std::string cmd = format(check_exists_string, env);
|
||||
#ifdef _MSC_VER
|
||||
return (run(cmd.c_str()) == 0);
|
||||
#else
|
||||
return (system(cmd.c_str()) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// A single compiler config is accessed through getConfig() (below)
|
||||
// Controls compilation options and may be updated based on the result
|
||||
// of compilation attempts.
|
||||
@ -353,5 +351,5 @@ static std::shared_ptr<FusedKernel> createFusionKernel(
|
||||
has_random);
|
||||
}
|
||||
|
||||
RegisterFusionBackend reg(DeviceType::CPU, createFusionKernel);
|
||||
static RegisterFusionBackend reg(DeviceType::CPU, createFusionKernel);
|
||||
} // namespace torch::jit::fuser::cpu
|
||||
|
@ -19,7 +19,7 @@ c10::AliasAnalysisKind aliasAnalysisIsSpecialCase() {
|
||||
|
||||
// Registers fused operators so that fused graphs can properly generate fallback
|
||||
// code.
|
||||
RegisterOperators reg_fused_operators({Operator(
|
||||
static RegisterOperators reg_fused_operators({Operator(
|
||||
prim::FusedConcat,
|
||||
[](const Node* node) -> Operation {
|
||||
int64_t dim = node->i(attr::dim);
|
||||
|
@ -15,11 +15,11 @@ namespace detail {
|
||||
#ifdef TORCH_ENABLE_LLVM
|
||||
bool cpu_fuser_enabled = true;
|
||||
#else
|
||||
bool cpu_fuser_enabled = false;
|
||||
static bool cpu_fuser_enabled = false;
|
||||
#endif
|
||||
|
||||
// note: this doesn't necessarily enable NNC because NVFuser might override it
|
||||
bool gpu_fuser_enabled = true;
|
||||
static bool gpu_fuser_enabled = true;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
|
@ -104,7 +104,7 @@ static Operation createLlgaKernel(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators oneDNNFusionGroupOp({
|
||||
static RegisterOperators oneDNNFusionGroupOp({
|
||||
torch::jit::Operator(
|
||||
prim::oneDNNFusionGroup,
|
||||
createLlgaKernel,
|
||||
@ -169,7 +169,7 @@ static Operation createLlgaGuardKernel(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators oneDNNGuardOp({
|
||||
static RegisterOperators oneDNNGuardOp({
|
||||
torch::jit::Operator(
|
||||
prim::oneDNNFusionGuard,
|
||||
createLlgaGuardKernel,
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
auto scalar_operators_source = at::jit::CodeTemplate(
|
||||
static auto scalar_operators_source = at::jit::CodeTemplate(
|
||||
R"SCRIPT(
|
||||
def mul(a : ${Scalar}, b : Tensor) -> Tensor:
|
||||
return b * a
|
||||
@ -23,7 +23,7 @@ def div(a : ${Scalar}, b : Tensor) -> Tensor:
|
||||
return torch.reciprocal(b) * a
|
||||
)SCRIPT");
|
||||
|
||||
auto scalar_operators_no_complex_source = at::jit::CodeTemplate(
|
||||
static auto scalar_operators_no_complex_source = at::jit::CodeTemplate(
|
||||
R"SCRIPT(
|
||||
def lt(a : ${Scalar}, b : Tensor) -> Tensor:
|
||||
return b > a
|
||||
@ -35,19 +35,19 @@ def ge(a : ${Scalar}, b : Tensor) -> Tensor:
|
||||
return b <= a
|
||||
)SCRIPT");
|
||||
|
||||
auto _ntuple_ops = at::jit::CodeTemplate(
|
||||
static auto _ntuple_ops = at::jit::CodeTemplate(
|
||||
R"SCRIPT(
|
||||
def _${name}(x: BroadcastingList${Length}[${Scalar}]) -> List[${Scalar}]:
|
||||
return x
|
||||
)SCRIPT");
|
||||
|
||||
auto floordiv = at::jit::CodeTemplate(
|
||||
static auto floordiv = at::jit::CodeTemplate(
|
||||
R"SCRIPT(
|
||||
def floordiv(self : Tensor, other : ${Rhs_Type}) -> Tensor:
|
||||
return torch.floor_divide(self, other)
|
||||
)SCRIPT");
|
||||
|
||||
auto tensor_properties =
|
||||
static auto tensor_properties =
|
||||
R"SCRIPT(
|
||||
def ndim(a : Tensor) -> int:
|
||||
return a.dim()
|
||||
@ -67,7 +67,7 @@ def shape(a : Tensor) -> List[int]:
|
||||
// aten::_assert_int_or_pair op which was removed once we were able to compile
|
||||
// torch.nn.functional.assert_int_or_pair
|
||||
// list_with_default also needs to be here for BC
|
||||
auto aten_ops =
|
||||
static auto aten_ops =
|
||||
R"SCRIPT(
|
||||
def _assert_int_or_pair(vals: List[int], name: str, message: str):
|
||||
pass
|
||||
|
@ -6,7 +6,7 @@ namespace torch::jit {
|
||||
|
||||
// Avoid storing objects with destructor in thread_local for mobile build.
|
||||
#ifndef C10_MOBILE
|
||||
thread_local std::vector<Call> calls;
|
||||
static thread_local std::vector<Call> calls;
|
||||
#endif // C10_MOBILE
|
||||
|
||||
ErrorReport::ErrorReport(const ErrorReport& e)
|
||||
|
@ -624,11 +624,6 @@ static Value* materializeConstant(
|
||||
return new_constant;
|
||||
}
|
||||
|
||||
inline bool isSupportedListElementType(const TypePtr& type) {
|
||||
return type->isSubtypeOf(*TensorType::get()) ||
|
||||
type->isSubtypeOf(*NumberType::get());
|
||||
}
|
||||
|
||||
// Information for each def being emitted.
|
||||
// Defs can be nested to support closures so we need a stack of this information
|
||||
// Currently records information about the functions return type.
|
||||
|
@ -34,14 +34,14 @@ namespace torch::jit::tracer {
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
void genericAddInput(Node* n, T value) {
|
||||
static void genericAddInput(Node* n, T value) {
|
||||
Value* v = n->owningGraph()->insertConstant(value);
|
||||
recordSourceLocation(v->node());
|
||||
n->addInput(v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void genericAddOptionalInput(
|
||||
static void genericAddOptionalInput(
|
||||
Node* n,
|
||||
const char* name,
|
||||
const std::optional<T>& value) {
|
||||
@ -55,7 +55,7 @@ void genericAddOptionalInput(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void badArgType(const T& v) {
|
||||
static void badArgType(const T& v) {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Found an unsupported argument type in the JIT tracer: ",
|
||||
@ -63,7 +63,7 @@ void badArgType(const T& v) {
|
||||
". File a bug report.");
|
||||
}
|
||||
|
||||
thread_local std::shared_ptr<TracingState> tracing_state;
|
||||
static thread_local std::shared_ptr<TracingState> tracing_state;
|
||||
} // namespace detail
|
||||
|
||||
static std::atomic<bool> tracer_state_warn_mode{true};
|
||||
@ -1055,8 +1055,8 @@ void ArgumentStash::stashValue(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// no python present so we just do not record source information
|
||||
static void defaultRecordSourceLocation(Node* n) {}
|
||||
std::atomic<decltype(&defaultRecordSourceLocation)> record_source_location(
|
||||
defaultRecordSourceLocation);
|
||||
static std::atomic<decltype(&defaultRecordSourceLocation)>
|
||||
record_source_location(defaultRecordSourceLocation);
|
||||
void recordSourceLocation(Node* n) {
|
||||
return record_source_location.load()(n);
|
||||
}
|
||||
@ -1067,7 +1067,7 @@ void setRecordSourceLocation(void (*v)(Node*)) {
|
||||
static std::vector<StackEntry> defaultPythonCallstack() {
|
||||
return std::vector<StackEntry>();
|
||||
}
|
||||
std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
|
||||
static std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
|
||||
defaultPythonCallstack);
|
||||
std::vector<StackEntry> pythonCallstack() {
|
||||
return python_callstack_fn.load()();
|
||||
@ -1079,7 +1079,7 @@ void setPythonCallstack(std::vector<StackEntry> (*v)()) {
|
||||
static void defaultWarn(const std::string& str) {
|
||||
TORCH_WARN(str);
|
||||
}
|
||||
std::atomic<warn_fn_type> warn_callback{defaultWarn};
|
||||
static std::atomic<warn_fn_type> warn_callback{defaultWarn};
|
||||
|
||||
const char* WARN_PYTHON_DATAFLOW =
|
||||
" might cause the trace to be incorrect. We can't record the data flow of "
|
||||
|
@ -100,7 +100,9 @@ void findAllNodes(
|
||||
// NB: This overload will become ambiguous with the one Caffe2 provides in its
|
||||
// logging, if they ever intersect.
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream& out, const std::vector<T>& nodes) {
|
||||
static std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const std::vector<T>& nodes) {
|
||||
out << at::ArrayRef<T>{nodes};
|
||||
return out;
|
||||
}
|
||||
@ -1671,7 +1673,7 @@ size_t Node::blocksFromGraphBlock() {
|
||||
return dist;
|
||||
}
|
||||
|
||||
inline const SourceRange& fakeRange() {
|
||||
static inline const SourceRange& fakeRange() {
|
||||
static SourceRange range(std::make_shared<Source>(std::string("")), 0, 1);
|
||||
return range;
|
||||
}
|
||||
@ -2038,7 +2040,7 @@ at::ArrayRef<Value*> createTupleUnpack(Value* v) {
|
||||
return g.insertNode(g.createTupleUnpack(v))->outputs();
|
||||
}
|
||||
|
||||
void inlineCallStackOfNode(
|
||||
static void inlineCallStackOfNode(
|
||||
Node* n,
|
||||
std::unordered_map<InlinedCallStack*, InlinedCallStackPtr>& new_cs_entries,
|
||||
Function* callee,
|
||||
|
@ -16,7 +16,7 @@ const static BackportManager backportManager;
|
||||
|
||||
// Forward declare so that _backport_for_mobile() overloads can
|
||||
// call this method directly.
|
||||
bool _backport_for_mobile_impl(
|
||||
static bool _backport_for_mobile_impl(
|
||||
std::istream& oss,
|
||||
PyTorchStreamWriter& writer,
|
||||
const int64_t to_version);
|
||||
|
@ -137,7 +137,7 @@ uint64_t _get_model_bytecode_version(
|
||||
|
||||
/********************** Operator Version **********************/
|
||||
|
||||
uint64_t _get_model_operator_version(
|
||||
static uint64_t _get_model_operator_version(
|
||||
PyTorchStreamReader& reader); // Forward Declare
|
||||
|
||||
uint64_t _get_model_operator_version(std::istream& in) {
|
||||
@ -168,7 +168,7 @@ uint64_t _get_model_operator_version(PyTorchStreamReader& reader) {
|
||||
/********************** Operators and Info **********************/
|
||||
|
||||
// Forward declare
|
||||
std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
|
||||
static std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
|
||||
std::vector<IValue> bytecode_ivalues);
|
||||
|
||||
std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
char const* toString(OpCode op);
|
||||
namespace mobile {
|
||||
Function::Function(c10::QualifiedName name) : name_(std::move(name)) {}
|
||||
|
||||
|
@ -12,11 +12,11 @@
|
||||
#include <torch/csrc/jit/mobile/function.h>
|
||||
#include <torch/csrc/jit/mobile/observer.h>
|
||||
#include <torch/csrc/jit/mobile/promoted_prim_ops.h>
|
||||
#include <torch/csrc/jit/runtime/instruction.h>
|
||||
#include <torch/csrc/jit/runtime/jit_exception.h>
|
||||
#include <torch/csrc/jit/runtime/vararg_functions.h>
|
||||
|
||||
namespace torch::jit {
|
||||
char const* toString(OpCode op);
|
||||
std::ostream& operator<<(std::ostream& out, Instruction inst);
|
||||
namespace mobile {
|
||||
InterpreterState::InterpreterState(const Code& code) {
|
||||
|
@ -110,7 +110,7 @@ static bool shape_is_fast_for_reduce(
|
||||
return m < 512 || ((l < 256 && r < 256) || (l > 256 && r > 256));
|
||||
}
|
||||
|
||||
RegisterOperators mm_tree_reduction_reg({Operator(
|
||||
static RegisterOperators mm_tree_reduction_reg({Operator(
|
||||
"prim::MMTreeReduce(...) -> Tensor",
|
||||
[](Stack& stack) {
|
||||
auto num_inputs = pop(stack).toInt();
|
||||
@ -323,7 +323,7 @@ static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
|
||||
return other_side_input.numel() <= 1024 * 2048;
|
||||
}
|
||||
|
||||
RegisterOperators mm_batch_side_reg({Operator(
|
||||
static RegisterOperators mm_batch_side_reg({Operator(
|
||||
prim::MMBatchSide,
|
||||
[](const Node* node) -> Operation {
|
||||
size_t num_other_side_inputs = node->inputs().size() - 1;
|
||||
|
@ -57,7 +57,7 @@ static bool isDecomposableNorm(Node* normalize_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
RegisterOperators reg_ops(
|
||||
static RegisterOperators reg_ops(
|
||||
{Operator(
|
||||
"aten::_ncf_unsqueeze(Tensor(a) self, int ndim) -> Tensor(a)",
|
||||
[](Stack& stack) {
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
void removePrintOps(Block* block) {
|
||||
static void removePrintOps(Block* block) {
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
|
||||
++it) {
|
||||
for (auto b : it->blocks()) {
|
||||
@ -46,7 +46,7 @@ void RemovePrintOps(std::shared_ptr<Graph>& graph) {
|
||||
GRAPH_DUMP("After RemovePrintOps: ", graph);
|
||||
}
|
||||
|
||||
void checkONNXCompatibility(const c10::FunctionSchema& schema) {
|
||||
static void checkONNXCompatibility(const c10::FunctionSchema& schema) {
|
||||
// in ONNX, all inputs are tensors, no support for tensor list
|
||||
// so at most one input tensor list is supported
|
||||
bool has_tensor_list = false;
|
||||
@ -74,7 +74,7 @@ void checkONNXCompatibility(const c10::FunctionSchema& schema) {
|
||||
}
|
||||
}
|
||||
|
||||
void preprocessCaffe2Ops(Block* block) {
|
||||
static void preprocessCaffe2Ops(Block* block) {
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
|
||||
++it) {
|
||||
for (auto b : it->blocks()) {
|
||||
@ -246,7 +246,7 @@ py::dict BlockToONNX(
|
||||
return py::dict();
|
||||
}
|
||||
|
||||
bool ConstantFoldCondition(torch::jit::Value* output) {
|
||||
static bool ConstantFoldCondition(torch::jit::Value* output) {
|
||||
auto fold_condition = output->node()->kind() != c10::onnx::Constant &&
|
||||
ConstantValueMap::HasValue(output->debugName());
|
||||
auto reliable_value =
|
||||
|
@ -14,7 +14,7 @@ using namespace ::c10::onnx;
|
||||
// many constant operators would have already been removed in the export before
|
||||
// this step. On the other hand if cast is inserted in symbolic, subsequent node
|
||||
// conversion will break if it depends on certain inputs being constant.
|
||||
void CastAllConstantToFloating(Block* block) {
|
||||
static void CastAllConstantToFloating(Block* block) {
|
||||
auto graph = block->owningGraph();
|
||||
auto it = block->nodes().begin();
|
||||
while (it != block->nodes().end()) {
|
||||
|
@ -30,7 +30,7 @@ enum OnnxType : int {
|
||||
ONNX_UINT32,
|
||||
};
|
||||
|
||||
std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
|
||||
static std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
|
||||
// Only conversion of ONNX numeric types is included here.
|
||||
// Unsigned ONNX types are mapped to the next higher signed
|
||||
// ScalarType type.
|
||||
@ -46,7 +46,7 @@ std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
|
||||
{ONNX_UINT32, at::kLong},
|
||||
};
|
||||
|
||||
void handleNegativeStartEndIndex(
|
||||
static void handleNegativeStartEndIndex(
|
||||
int64_t& start,
|
||||
int64_t& end,
|
||||
int64_t& axis,
|
||||
@ -63,7 +63,7 @@ void handleNegativeStartEndIndex(
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<at::Tensor> runTorchSlice_opset9(
|
||||
static std::optional<at::Tensor> runTorchSlice_opset9(
|
||||
const Node* node,
|
||||
std::vector<at::Tensor>& inputTensorValues) {
|
||||
assert(inputTensorValues.size() == 1);
|
||||
@ -103,7 +103,7 @@ std::optional<at::Tensor> runTorchSlice_opset9(
|
||||
return std::optional<at::Tensor>(updated_val);
|
||||
}
|
||||
|
||||
std::optional<at::Tensor> runTorchSlice_opset10(
|
||||
static std::optional<at::Tensor> runTorchSlice_opset10(
|
||||
const Node* node,
|
||||
std::vector<at::Tensor>& inputTensorValues) {
|
||||
const int maxSliceInputCount = 5;
|
||||
@ -198,7 +198,7 @@ std::optional<at::Tensor> runTorchSlice_opset10(
|
||||
}
|
||||
|
||||
// Refer to AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF
|
||||
at::Tensor runTorchArange_opset11(
|
||||
static at::Tensor runTorchArange_opset11(
|
||||
const Node* node,
|
||||
const std::vector<at::Tensor>& inputTensorValues) {
|
||||
TORCH_INTERNAL_ASSERT(inputTensorValues.size() == 3);
|
||||
@ -542,7 +542,7 @@ std::optional<at::Tensor> runTorchBackendForOnnx(
|
||||
}
|
||||
}
|
||||
|
||||
bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
|
||||
static bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
|
||||
auto parentNode = val->node();
|
||||
return (parentNode->kind() == prim::Param &&
|
||||
valsToParamsMap.find(val) !=
|
||||
@ -553,7 +553,7 @@ bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
|
||||
AttributeKind::t); // Check other types?
|
||||
}
|
||||
|
||||
bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
|
||||
static bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
|
||||
for (auto input : n->inputs()) {
|
||||
if (valsToParamsMap.find(input) != valsToParamsMap.end()) {
|
||||
return true;
|
||||
@ -562,7 +562,7 @@ bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> getValues(
|
||||
static std::vector<at::Tensor> getValues(
|
||||
Node* node,
|
||||
const ValueToParamPairMap& valsToParamsMap) {
|
||||
size_t numInputs = node->inputs().size();
|
||||
@ -587,7 +587,7 @@ std::vector<at::Tensor> getValues(
|
||||
return inputTensorValues;
|
||||
}
|
||||
|
||||
bool areNodeInputsConstant(
|
||||
static bool areNodeInputsConstant(
|
||||
Node* node,
|
||||
const ValueToParamPairMap& valsToParamsMap) {
|
||||
return std::all_of(
|
||||
@ -596,7 +596,7 @@ bool areNodeInputsConstant(
|
||||
[&valsToParamsMap](Value* v) { return isConstant(v, valsToParamsMap); });
|
||||
}
|
||||
|
||||
std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
|
||||
static std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
|
||||
std::vector<Node*> parentNodes;
|
||||
for (auto val : node->inputs()) {
|
||||
// If the parent of 'node' is an onnx::Constant node,
|
||||
@ -619,7 +619,10 @@ std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
|
||||
// This is more of a partial evaluation analysis, where operations on constant
|
||||
// nodes can be lifted so we run them earlier, before the usual parameters are
|
||||
// known.
|
||||
void ConstantFoldONNX(Block* b, ParamMap& paramsDict, int opset_version) {
|
||||
static void ConstantFoldONNX(
|
||||
Block* b,
|
||||
ParamMap& paramsDict,
|
||||
int opset_version) {
|
||||
if (opset_version < ONNX_OPSET_9) {
|
||||
TORCH_WARN(
|
||||
"Constant folding supported for only opsets >= 9. "
|
||||
|
@ -234,7 +234,7 @@ DimSymbolMap& ConstantValueMap::GetDimSymbolMap() {
|
||||
}
|
||||
|
||||
template <typename Map>
|
||||
void UpdateStrKey(
|
||||
static void UpdateStrKey(
|
||||
Map& map,
|
||||
const std::string& old_key,
|
||||
const std::string& new_key) {
|
||||
|
@ -10,7 +10,7 @@ namespace onnx {
|
||||
using namespace ::c10::onnx;
|
||||
}
|
||||
|
||||
void DeduplicateInitializers(
|
||||
static void DeduplicateInitializers(
|
||||
std::shared_ptr<Graph>& g,
|
||||
ValueToParamPairMap& valsToParamsMap,
|
||||
bool (*comp)(at::Tensor&, at::Tensor&)) {
|
||||
@ -62,12 +62,12 @@ void DeduplicateInitializers(
|
||||
}
|
||||
}
|
||||
|
||||
bool DeduplicateInitializersByDataPtr(at::Tensor& t1, at::Tensor& t2) {
|
||||
static bool DeduplicateInitializersByDataPtr(at::Tensor& t1, at::Tensor& t2) {
|
||||
return t1.sizes().equals(t2.sizes()) && t1.strides().equals(t2.strides()) &&
|
||||
(t1.has_storage() && t2.has_storage() && t1.data_ptr() == t2.data_ptr());
|
||||
}
|
||||
|
||||
bool DeduplicateInitializersByValue(at::Tensor& t1, at::Tensor& t2) {
|
||||
static bool DeduplicateInitializersByValue(at::Tensor& t1, at::Tensor& t2) {
|
||||
if (t1.dtype() != t2.dtype() || !t1.sizes().equals(t2.sizes()) ||
|
||||
!t1.strides().equals(t2.strides())) {
|
||||
return false;
|
||||
|
@ -12,7 +12,7 @@ namespace onnx {
|
||||
using namespace ::c10::onnx;
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> getValues(
|
||||
static std::vector<at::Tensor> getValues(
|
||||
Node* node,
|
||||
const ValueToParamPairMap& valsToParamsMap) {
|
||||
size_t numInputs = node->inputs().size();
|
||||
@ -140,7 +140,7 @@ static void fuseConvBatchNorm(Block* b, ValueToParamPairMap& valsToParamsMap) {
|
||||
}
|
||||
}
|
||||
|
||||
void EvalPeepholeONNX(Block* b, ParamMap& paramsDict) {
|
||||
static void EvalPeepholeONNX(Block* b, ParamMap& paramsDict) {
|
||||
auto valsToParamsMap = buildValueToParamsMap(b, paramsDict);
|
||||
fuseConvBatchNorm(b, valsToParamsMap);
|
||||
buildParamsMapFromValueToParamsMap(valsToParamsMap, paramsDict);
|
||||
|
@ -347,7 +347,7 @@ void FixupONNXLoopNodeInputs(Node* node, int opset_version) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {
|
||||
static std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {
|
||||
auto output_size = node->outputs().size();
|
||||
GRAPH_DEBUG("before FixupONNXLoopBlockInputs: ", *node->owningGraph());
|
||||
FixupONNXLoopBlockInputs(node);
|
||||
@ -368,7 +368,7 @@ std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {
|
||||
|
||||
// Check if node is prim::Uninitialized,
|
||||
// or output of prim::Uninitialized->onnx::Identity
|
||||
bool IsUninitializedNode(Node* n) {
|
||||
static bool IsUninitializedNode(Node* n) {
|
||||
if (n->kind() == ::c10::onnx::Identity &&
|
||||
n->inputs()[0]->node()->kind() == prim::Uninitialized)
|
||||
return true;
|
||||
@ -380,7 +380,7 @@ bool IsUninitializedNode(Node* n) {
|
||||
// Infer shape and type of the uninitialized_output from the corresponding
|
||||
// output of the other subblock. prim::Uninitialized node is proven to be
|
||||
// unused. So replace this node with one of the inferred shape and type.
|
||||
void InferShapeTypeForUninitializedOutput(
|
||||
static void InferShapeTypeForUninitializedOutput(
|
||||
Graph* graph,
|
||||
Block* block,
|
||||
Value* uninitialized_output,
|
||||
@ -456,7 +456,7 @@ void InferShapeTypeForUninitializedOutput(
|
||||
// -> (%1, %y.1, %7)
|
||||
// ...
|
||||
|
||||
void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
|
||||
static void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
|
||||
if (node->kind() != ::c10::onnx::If) {
|
||||
return;
|
||||
}
|
||||
@ -510,7 +510,7 @@ void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
|
||||
}
|
||||
}
|
||||
|
||||
void ONNXMergeIfBlockOutputShapes(Node* node) {
|
||||
static void ONNXMergeIfBlockOutputShapes(Node* node) {
|
||||
TORCH_INTERNAL_ASSERT(node->kind() == ::c10::onnx::If);
|
||||
Block* then_block = node->blocks().at(0);
|
||||
Block* else_block = node->blocks().at(1);
|
||||
@ -663,7 +663,7 @@ void ONNXMergeIfBlockOutputShapes(Node* node) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Value*> FixupONNXIfNode(Node* node, int opset_version) {
|
||||
static std::vector<Value*> FixupONNXIfNode(Node* node, int opset_version) {
|
||||
if (node->kind() != ::c10::onnx::If) {
|
||||
return node->outputs().vec();
|
||||
}
|
||||
|
@ -1125,20 +1125,6 @@ NodeAttrNameMap ONNXFunctionExtraction(
|
||||
return fe.run();
|
||||
}
|
||||
|
||||
Node* ONNXGetPreviousScope(std::shared_ptr<Graph>& graph) {
|
||||
auto* last_node = graph->nodes().back()->prev();
|
||||
auto* scope_node = NodeOfMostRecentScope(last_node);
|
||||
auto* attr_node = scope_attr_graph_->create(prim::TracedModuleForward);
|
||||
attr_node->setScope(scope_node->scope());
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
scope_attr_map_.find(scope_node->scope()) == scope_attr_map_.end(),
|
||||
"Found duplicated scope. Scope ",
|
||||
scope_node->scope()->namesFromRoot(),
|
||||
" already processed.");
|
||||
scope_attr_map_[scope_node->scope()] = attr_node;
|
||||
return attr_node;
|
||||
}
|
||||
|
||||
void ONNXClearScopeRecords() {
|
||||
scope_attr_map_.clear();
|
||||
scope_attr_graph_ = std::make_shared<Graph>();
|
||||
|
@ -240,7 +240,7 @@ Node* transformToONNXConcatNode(
|
||||
return concat_node;
|
||||
}
|
||||
|
||||
void ONNXLintGraph(
|
||||
static void ONNXLintGraph(
|
||||
const Block* b,
|
||||
std::vector<NodeKind>& n_miss_source_range,
|
||||
std::vector<NodeKind>& n_miss_scope) {
|
||||
|
@ -22,7 +22,7 @@ using namespace ::c10::onnx;
|
||||
// ...
|
||||
// %weight = prim::GetAttr[name="scale"](%B)
|
||||
// ...
|
||||
std::deque<std::string> findSubModuleAttr(
|
||||
static std::deque<std::string> findSubModuleAttr(
|
||||
Value* input,
|
||||
std::string& name,
|
||||
Module& attrModule,
|
||||
@ -48,7 +48,10 @@ std::deque<std::string> findSubModuleAttr(
|
||||
return moduleNames;
|
||||
}
|
||||
|
||||
Value* addParamAsArgument(Function* function, std::string& name, IValue& attr) {
|
||||
static Value* addParamAsArgument(
|
||||
Function* function,
|
||||
std::string& name,
|
||||
IValue& attr) {
|
||||
auto schema = function->getSchema();
|
||||
auto args = schema.arguments();
|
||||
args.emplace_back(name, nullptr, std::nullopt, attr);
|
||||
@ -64,7 +67,7 @@ Value* addParamAsArgument(Function* function, std::string& name, IValue& attr) {
|
||||
attr.type());
|
||||
}
|
||||
|
||||
std::vector<IValue> getParamAttributes(
|
||||
static std::vector<IValue> getParamAttributes(
|
||||
Block* block,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
const Module& module_,
|
||||
@ -163,7 +166,7 @@ std::vector<IValue> getParamAttributes(
|
||||
return parameterIValues;
|
||||
}
|
||||
|
||||
void insertMainModuleAsConstant(const std::shared_ptr<Graph>& graph) {
|
||||
static void insertMainModuleAsConstant(const std::shared_ptr<Graph>& graph) {
|
||||
auto* constNode = graph->create(prim::CreateObject);
|
||||
constNode->output()->setType(graph->inputs().at(0)->type());
|
||||
auto it = graph->nodes().begin();
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
void convertSubgraphToSubBlock(Block* block) {
|
||||
static void convertSubgraphToSubBlock(Block* block) {
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end();
|
||||
it != end;) {
|
||||
Node* node = *it++;
|
||||
|
@ -29,12 +29,12 @@ namespace onnx {
|
||||
using namespace ::c10::onnx;
|
||||
}
|
||||
|
||||
bool isRNN(const Node* node) {
|
||||
static bool isRNN(const Node* node) {
|
||||
auto k = node->kind();
|
||||
return k == onnx::RNN || k == onnx::LSTM || k == onnx::GRU;
|
||||
}
|
||||
|
||||
bool isNopTranspose(const std::vector<int64_t>& perm) {
|
||||
static bool isNopTranspose(const std::vector<int64_t>& perm) {
|
||||
for (int64_t i = 0, perm_size = perm.size(); i < perm_size; i++) {
|
||||
if (perm[i] != i) {
|
||||
return false;
|
||||
@ -52,7 +52,7 @@ bool isNopTranspose(const std::vector<int64_t>& perm) {
|
||||
// iteration would have folded all the transposes up to that point. Thus,
|
||||
// `ret[i] = t1[t2[i]]` says "the output of t2 at position i takes the value of
|
||||
// the input tensor index contained in t1 at position `t2[i]``".
|
||||
std::vector<int64_t> composeTransposes(
|
||||
static std::vector<int64_t> composeTransposes(
|
||||
const std::vector<int64_t>& t1,
|
||||
const std::vector<int64_t>& t2) {
|
||||
TORCH_INTERNAL_ASSERT(t1.size() == t2.size());
|
||||
@ -65,7 +65,7 @@ std::vector<int64_t> composeTransposes(
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<size_t> getBroadcastPositions(Node* node) {
|
||||
static std::vector<size_t> getBroadcastPositions(Node* node) {
|
||||
// Most of the element-wise ops in ONNX supports numpy broadcasting.
|
||||
// Only GEMM supports one-directional broadcasting, which broadcasts the bias
|
||||
// to the product.
|
||||
@ -100,7 +100,7 @@ std::vector<size_t> getBroadcastPositions(Node* node) {
|
||||
// Determine whether `from` can broadcast to `to`, and if so at which
|
||||
// position. `from` must be a suffix of `to`, except that any
|
||||
// occurrences of 1 in `from` are treated as wildcards.
|
||||
std::optional<size_t> fusibleExpandTo(
|
||||
static std::optional<size_t> fusibleExpandTo(
|
||||
at::IntArrayRef from,
|
||||
at::IntArrayRef to) {
|
||||
if (from.size() > to.size()) {
|
||||
@ -122,7 +122,7 @@ std::optional<size_t> fusibleExpandTo(
|
||||
// easier for non-strided backends to more efficiently do broadcasts if this
|
||||
// is local information. This optimization is not useful for PyTorch as
|
||||
// 'expand' is free.
|
||||
void fuseBroadcast(Block* b) {
|
||||
static void fuseBroadcast(Block* b) {
|
||||
for (auto n : b->nodes()) {
|
||||
for (auto* child_block : n->blocks()) {
|
||||
fuseBroadcast(child_block);
|
||||
@ -179,7 +179,7 @@ void fuseBroadcast(Block* b) {
|
||||
}
|
||||
}
|
||||
|
||||
void fuseConsecutiveTransposes(Block* b) {
|
||||
static void fuseConsecutiveTransposes(Block* b) {
|
||||
for (auto n : b->nodes()) {
|
||||
for (auto* child_block : n->blocks()) {
|
||||
fuseConsecutiveTransposes(child_block);
|
||||
@ -201,7 +201,7 @@ void fuseConsecutiveTransposes(Block* b) {
|
||||
}
|
||||
}
|
||||
|
||||
void eliminateNopTranspose(Block* b) {
|
||||
static void eliminateNopTranspose(Block* b) {
|
||||
for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
|
||||
auto n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
@ -217,7 +217,7 @@ void eliminateNopTranspose(Block* b) {
|
||||
}
|
||||
}
|
||||
|
||||
void fuseTransposeIntoGemm(Block* b) {
|
||||
static void fuseTransposeIntoGemm(Block* b) {
|
||||
static const std::vector<int64_t> simpleTransPerm({1, 0});
|
||||
|
||||
for (auto n : b->nodes()) {
|
||||
@ -257,7 +257,7 @@ void fuseTransposeIntoGemm(Block* b) {
|
||||
// the removeNopPacking pass removes the packing operations
|
||||
// entirely by pairing them with their inverse PadPacked. If the
|
||||
// input graph does not pair the operations, export will fail.
|
||||
void pushPackingPastRnn(Block* b) {
|
||||
static void pushPackingPastRnn(Block* b) {
|
||||
for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
|
||||
auto* n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
@ -396,7 +396,7 @@ void pushPackingPastRnn(Block* b) {
|
||||
// Despite the name, this actually removes the PadPacked node and leaves
|
||||
// the PackPadded node. The PackPadded should become dead code which will
|
||||
// be eliminated later.
|
||||
void removeNopPacking(Block* graph) {
|
||||
static void removeNopPacking(Block* graph) {
|
||||
for (auto it = graph->nodes().begin(); it != graph->nodes().end(); ++it) {
|
||||
auto* n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
@ -424,7 +424,7 @@ void removeNopPacking(Block* graph) {
|
||||
}
|
||||
}
|
||||
|
||||
void hackFixupPadPackedShapes(Block* graph) {
|
||||
static void hackFixupPadPackedShapes(Block* graph) {
|
||||
// FIXME: the shape of the input to the fictional PadPacked node has
|
||||
// incorrect shape. For now, just copy the shape of PadPacked to the shape
|
||||
// of its input.
|
||||
@ -442,7 +442,7 @@ void hackFixupPadPackedShapes(Block* graph) {
|
||||
}
|
||||
}
|
||||
|
||||
void fixDefaultRNNState(
|
||||
static void fixDefaultRNNState(
|
||||
Graph* graph,
|
||||
Node* n,
|
||||
int input_index,
|
||||
@ -535,7 +535,7 @@ void fixDefaultRNNState(
|
||||
}
|
||||
}
|
||||
|
||||
void fixDefaultRnnHiddenState(Block* b, int opset_version) {
|
||||
static void fixDefaultRnnHiddenState(Block* b, int opset_version) {
|
||||
for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
|
||||
auto* n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
@ -554,7 +554,7 @@ void fixDefaultRnnHiddenState(Block* b, int opset_version) {
|
||||
}
|
||||
}
|
||||
|
||||
void fixDefaultLstmCellState(Block* b, int opset_version) {
|
||||
static void fixDefaultLstmCellState(Block* b, int opset_version) {
|
||||
for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
|
||||
auto* n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
@ -791,7 +791,7 @@ static void eraseTupleConstruct(Block* block) {
|
||||
}
|
||||
}
|
||||
|
||||
void removeMaxPoolUnusedOutput(Block* b) {
|
||||
static void removeMaxPoolUnusedOutput(Block* b) {
|
||||
for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
|
||||
auto n = *it;
|
||||
for (auto* child_block : n->blocks()) {
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
inline bool PyNone_Check(PyObject* o) {
|
||||
static inline bool PyNone_Check(PyObject* o) {
|
||||
return o == Py_None;
|
||||
}
|
||||
|
||||
@ -2027,7 +2027,7 @@ void UpdateReliable(Node* n) {
|
||||
// Traverse the graph inputs and compute reliability (e.g., are shapes static).
|
||||
// Since the inputs do not change during export, we save computation time by
|
||||
// marking it as computed and subsequently skipping.
|
||||
void SetGraphInputTypeReliable(const Graph* g) {
|
||||
static void SetGraphInputTypeReliable(const Graph* g) {
|
||||
if (!ConstantValueMap::GetAllGraphInputsReliableComputed()) {
|
||||
for (auto graph_input : g->inputs()) {
|
||||
if (!ConstantValueMap::HasTypeReliable(graph_input->debugName())) {
|
||||
@ -2255,7 +2255,7 @@ void ONNXSetDynamicInputShape(
|
||||
}
|
||||
}
|
||||
|
||||
bool HasSequenceTypeOutput(Node* node) {
|
||||
static bool HasSequenceTypeOutput(Node* node) {
|
||||
if (node->kind() == ::c10::onnx::SplitToSequence ||
|
||||
node->kind() == ::c10::onnx::SequenceInsert ||
|
||||
node->kind() == ::c10::onnx::SequenceEmpty ||
|
||||
@ -2266,7 +2266,7 @@ bool HasSequenceTypeOutput(Node* node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void ONNXUpdateTypeFromTensor(
|
||||
static void ONNXUpdateTypeFromTensor(
|
||||
Value* graph_output,
|
||||
const at::Tensor& output,
|
||||
bool onnx_shape_inference) {
|
||||
@ -2282,7 +2282,7 @@ void ONNXUpdateTypeFromTensor(
|
||||
// into flattened graph outputs. `outputs_index` is passed in to point to the
|
||||
// current index in flattened graph outputs. The updated `outputs_index` is
|
||||
// returned at the end of the function.
|
||||
size_t ONNXAssignOutputShape(
|
||||
static size_t ONNXAssignOutputShape(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
size_t outputs_index,
|
||||
PyObject* output_obj,
|
||||
|
@ -98,7 +98,7 @@ double getScaleFromInput(Node* input_node) {
|
||||
input_name);
|
||||
}
|
||||
|
||||
std::vector<Node*> CreateQuantizedWeights(
|
||||
static std::vector<Node*> CreateQuantizedWeights(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
const at::Tensor& weight,
|
||||
int8_t* data,
|
||||
@ -191,7 +191,7 @@ std::vector<Node*> CreateQuantizedWeights(
|
||||
return {data_node, scale_node, zero_point_node, axis_node};
|
||||
}
|
||||
|
||||
Node* CreateQuantizedBias(
|
||||
static Node* CreateQuantizedBias(
|
||||
std::vector<float> data,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
const std::vector<int64_t>& shapes) {
|
||||
@ -206,7 +206,7 @@ Node* CreateQuantizedBias(
|
||||
return const_node_1;
|
||||
}
|
||||
|
||||
Node* createIntTuple(
|
||||
static Node* createIntTuple(
|
||||
const std::vector<int64_t>& is,
|
||||
std::shared_ptr<Graph>& graph) {
|
||||
Node* const_node = graph->create(Symbol::onnx("Constant"));
|
||||
@ -214,13 +214,13 @@ Node* createIntTuple(
|
||||
return const_node;
|
||||
}
|
||||
|
||||
Node* createInt(int64_t i, std::shared_ptr<Graph>& graph) {
|
||||
static Node* createInt(int64_t i, std::shared_ptr<Graph>& graph) {
|
||||
Node* const_node = graph->create(Symbol::onnx("Constant"));
|
||||
const_node->i_(Symbol::attr("value"), i);
|
||||
return const_node;
|
||||
}
|
||||
|
||||
void ConvertQuantizedWeight(
|
||||
static void ConvertQuantizedWeight(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
Node* node,
|
||||
at::Tensor& weight) {
|
||||
@ -254,7 +254,7 @@ enum class QuantizedParamsType { CONV1D, CONV, LINEAR };
|
||||
// passed to the appropriate unpack function using c10::Dispatcher. We insert
|
||||
// the unpacked weights and bias into the graph using
|
||||
// caffe2::Int8GivenTensorFill nodes.
|
||||
void unpackQuantizedWeightsHelper(
|
||||
static void unpackQuantizedWeightsHelper(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::map<std::string, IValue>& paramsDict,
|
||||
const std::string& pattern,
|
||||
@ -547,7 +547,7 @@ static std::
|
||||
|
||||
// Unpack quantized tensor inputs into {value, scale, zero_point},
|
||||
// Then create a prim::TupleConstruct node based on these three values.
|
||||
void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
|
||||
static void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
|
||||
for (size_t index = 0; index < graph->inputs().size();) {
|
||||
auto g_input = graph->inputs()[index];
|
||||
TensorTypePtr shape_type = g_input->type()->cast<TensorType>();
|
||||
@ -707,7 +707,7 @@ void UnpackQuantizedWeights(
|
||||
// Caffe2 expects quantized ops to be in NHWC format while pytorch inputs are in
|
||||
// NCHW. This pass inserts permutes to convert from NCHW to NHWC before each
|
||||
// conv op and add another permute from NHWC to NCHW after the conv op.
|
||||
void insertPermutesHelper(
|
||||
static void insertPermutesHelper(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::map<std::string, IValue>& paramsDict,
|
||||
const std::string& pattern) {
|
||||
|
@ -18,7 +18,7 @@ using AtenFuncArgs = std::vector<FuncArg>;
|
||||
using CallFuncArgs = std::vector<FuncArg>;
|
||||
|
||||
// Lists of allowed quantizable operators
|
||||
std::vector<std::string> _static_quantizable_call_funcs = {
|
||||
static std::vector<std::string> _static_quantizable_call_funcs = {
|
||||
"conv2d",
|
||||
"linear",
|
||||
"batch_norm",
|
||||
@ -31,7 +31,7 @@ std::vector<std::string> _static_quantizable_call_funcs = {
|
||||
"embedding_bag",
|
||||
};
|
||||
|
||||
std::vector<std::string> _static_quantizable_aten_funcs = {
|
||||
static std::vector<std::string> _static_quantizable_aten_funcs = {
|
||||
"conv1d",
|
||||
"conv2d",
|
||||
"conv3d",
|
||||
@ -51,18 +51,18 @@ std::vector<std::string> _static_quantizable_aten_funcs = {
|
||||
"embedding_bag",
|
||||
};
|
||||
|
||||
std::vector<std::string> _dynamic_quantizable_call_funcs = {
|
||||
static std::vector<std::string> _dynamic_quantizable_call_funcs = {
|
||||
"linear",
|
||||
};
|
||||
|
||||
std::vector<std::string> _dynamic_quantizable_aten_funcs = {
|
||||
static std::vector<std::string> _dynamic_quantizable_aten_funcs = {
|
||||
"linear",
|
||||
};
|
||||
|
||||
std::vector<std::string> _static_weight_only_quant_aten_funcs = {
|
||||
static std::vector<std::string> _static_weight_only_quant_aten_funcs = {
|
||||
"embedding_bag",
|
||||
};
|
||||
std::vector<std::string> _static_weight_only_quant_call_funcs = {
|
||||
static std::vector<std::string> _static_weight_only_quant_call_funcs = {
|
||||
"embedding_bag",
|
||||
};
|
||||
|
||||
@ -73,7 +73,7 @@ std::vector<std::string> _static_weight_only_quant_call_funcs = {
|
||||
// output of the `prim::CallFunction`
|
||||
// Also these ops doesn't do computation on the value of Tensor, the
|
||||
// operation only depends on the shape of the Tensor
|
||||
std::vector<std::string> _single_input_general_shape_call_funcs = {
|
||||
static std::vector<std::string> _single_input_general_shape_call_funcs = {
|
||||
"_max_pool1d",
|
||||
"_max_pool2d",
|
||||
"_max_pool3d",
|
||||
@ -86,7 +86,7 @@ std::vector<std::string> _single_input_general_shape_call_funcs = {
|
||||
// Also these ops doesn't do computation on the value of Tensor, the
|
||||
// operation only depends on the shape of the Tensor
|
||||
// e.g. `aten::flatten(%input_tensor, ...)`
|
||||
std::vector<std::string> _single_input_general_shape_aten_funcs = {
|
||||
static std::vector<std::string> _single_input_general_shape_aten_funcs = {
|
||||
"max_pool1d",
|
||||
"max_pool2d",
|
||||
"max_pool3d",
|
||||
@ -121,7 +121,7 @@ std::vector<std::string> _single_input_general_shape_aten_funcs = {
|
||||
// Also these ops do computation on the value of Tensor
|
||||
// TODO: [Need verify] looks like we can quantize simple functionals that just
|
||||
// call into aten functions
|
||||
std::vector<std::string> _single_input_general_value_call_funcs = {
|
||||
static std::vector<std::string> _single_input_general_value_call_funcs = {
|
||||
"avg_pool1d",
|
||||
"avg_pool2d",
|
||||
"avg_pool3d",
|
||||
@ -140,7 +140,7 @@ std::vector<std::string> _single_input_general_value_call_funcs = {
|
||||
// have a single input Tensor
|
||||
// Also these ops do computation on the value of Tensor
|
||||
// e.g. `aten::avg_pool2d(%input_tensor, ...)`
|
||||
std::vector<std::string> _single_input_general_value_aten_funcs = {
|
||||
static std::vector<std::string> _single_input_general_value_aten_funcs = {
|
||||
"avg_pool1d",
|
||||
"avg_pool2d",
|
||||
"avg_pool3d",
|
||||
@ -163,7 +163,7 @@ std::vector<std::string> _single_input_general_value_aten_funcs = {
|
||||
"leaky_relu_",
|
||||
};
|
||||
|
||||
std::vector<std::string> _clamp_funcs = {
|
||||
static std::vector<std::string> _clamp_funcs = {
|
||||
"hardtanh",
|
||||
"hardtanh_",
|
||||
"clamp",
|
||||
@ -176,7 +176,7 @@ const float _sym_scale = 2.0f / 256.0f;
|
||||
const int _sym_zero_point = 128;
|
||||
// quantization parameters for ops with range 0 to 1
|
||||
// for example: aten/src/ATen/native/quantized/cpu/qsigmoid.cpp
|
||||
std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =
|
||||
static std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =
|
||||
std::make_tuple(
|
||||
c10::kPerTensorAffine,
|
||||
QParamVector(
|
||||
@ -186,16 +186,17 @@ std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =
|
||||
|
||||
// quantization parameters for ops with range -1 to 1
|
||||
// for example: aten/src/ATen/native/quantized/cpu/qtanh.cpp
|
||||
std::tuple<c10::QScheme, QParamVector> _per_tensor_sym_qparam = std::make_tuple(
|
||||
c10::kPerTensorAffine,
|
||||
QParamVector(
|
||||
{std::make_pair(".scale", IValue(_sym_scale)),
|
||||
std::make_pair(".zero_point", IValue(_sym_zero_point)),
|
||||
std::make_pair(".scalar_type", IValue(c10::kQUInt8))}));
|
||||
static std::tuple<c10::QScheme, QParamVector> _per_tensor_sym_qparam =
|
||||
std::make_tuple(
|
||||
c10::kPerTensorAffine,
|
||||
QParamVector(
|
||||
{std::make_pair(".scale", IValue(_sym_scale)),
|
||||
std::make_pair(".zero_point", IValue(_sym_zero_point)),
|
||||
std::make_pair(".scalar_type", IValue(c10::kQUInt8))}));
|
||||
|
||||
// Map from aten op symbol to the quantization parameters
|
||||
// for the ops with fixed quantization parameters
|
||||
std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
|
||||
static std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
|
||||
_fixed_qparams_map = {
|
||||
{Symbol::aten("hardsigmoid"), _per_tensor_asym_qparam},
|
||||
{Symbol::aten("hardsigmoid_"), _per_tensor_asym_qparam},
|
||||
@ -208,22 +209,26 @@ std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
|
||||
// Special checks for ops that do not require observers for all input tensors.
|
||||
// For each operator in this list observers are inserted for the input based
|
||||
// on the index specified.
|
||||
AtenFuncArgs _observe_inputs_aten_func = {};
|
||||
CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};
|
||||
static AtenFuncArgs _observe_inputs_aten_func = {};
|
||||
static CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};
|
||||
|
||||
// Aten functions for getting tensor information
|
||||
std::vector<std::string> _tensor_info_funcs = {"size", "len", "dim", "numel"};
|
||||
static std::vector<std::string> _tensor_info_funcs = {
|
||||
"size",
|
||||
"len",
|
||||
"dim",
|
||||
"numel"};
|
||||
|
||||
// Aten functions whose output will be quantized or not quantized depending
|
||||
// on input tensor
|
||||
std::vector<std::string> _propagate_quant_single_input_ops = {"cat"};
|
||||
static std::vector<std::string> _propagate_quant_single_input_ops = {"cat"};
|
||||
|
||||
// Rules are slightly different for binary ops like `aten::add`, for these ops,
|
||||
// if both of the inputs are Tensor, we'll quantize the output only if both of
|
||||
// the inputs are quantized
|
||||
// if the second input is a Scalar, we'll only look at the first input to decide
|
||||
// if we need to quantize the output
|
||||
std::vector<std::string> _propagate_quant_binary_ops = {
|
||||
static std::vector<std::string> _propagate_quant_binary_ops = {
|
||||
"add",
|
||||
"add_",
|
||||
"mul",
|
||||
|
@ -69,7 +69,7 @@ static std::map<int64_t, Value*> InsertSymbolicShapesCompute(
|
||||
return sym_shape_to_enclosing_graph_value;
|
||||
}
|
||||
|
||||
void insertDynamicShapesGuard(
|
||||
static void insertDynamicShapesGuard(
|
||||
const ShapeComputeGraphMapping& shape_mapping,
|
||||
Node* guarded_node,
|
||||
bool add_composed_op,
|
||||
@ -115,7 +115,7 @@ StrideInput strideInputFromString(const std::string& si) {
|
||||
// in the runtime guard, strides are serialized as one flat
|
||||
// vector. stride_inputs_offset indexes into that vector
|
||||
// where the strides of this tensor begin
|
||||
inline StrideInput summarizeStrideDim(
|
||||
static inline StrideInput summarizeStrideDim(
|
||||
const c10::IntArrayRef sizes,
|
||||
const c10::IntArrayRef strides,
|
||||
size_t dim,
|
||||
@ -517,7 +517,7 @@ static Operation StaticRuntimeCopyOuts(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators SRCopyOuts({
|
||||
static RegisterOperators SRCopyOuts({
|
||||
torch::jit::Operator(
|
||||
prim::StaticRuntimeCopyOuts,
|
||||
StaticRuntimeCopyOuts,
|
||||
@ -529,7 +529,7 @@ RegisterOperators SRCopyOuts({
|
||||
// and also the that the symbolic shape dimensions are observed.
|
||||
// For any symbolic dimension we need to set its value on its first
|
||||
// use and for all subsequent uses check that the values are equal
|
||||
RegisterOperators reg_guard({
|
||||
static RegisterOperators reg_guard({
|
||||
Operator(
|
||||
"prim::TensorExprDynamicGuard(...) -> bool",
|
||||
[](const Node* node) -> Operation {
|
||||
@ -736,7 +736,7 @@ static Operation createTensorExprDynamicGroup(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators TensorExprDynamicOp({
|
||||
static RegisterOperators TensorExprDynamicOp({
|
||||
torch::jit::Operator(
|
||||
prim::TensorExprDynamicGroup,
|
||||
createTensorExprDynamicGroup,
|
||||
|
@ -1436,7 +1436,7 @@ static Operation createTensorExprOp(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators TensorExprOps({
|
||||
static RegisterOperators TensorExprOps({
|
||||
torch::jit::Operator(
|
||||
prim::TensorExprGroup,
|
||||
createTensorExprOp,
|
||||
|
@ -77,6 +77,7 @@
|
||||
#include <torch/csrc/jit/passes/utils/check_alias_annotation.h>
|
||||
#include <torch/csrc/jit/passes/vulkan_rewrite.h>
|
||||
#include <torch/csrc/jit/passes/xnnpack_rewrite.h>
|
||||
#include <torch/csrc/jit/python/init.h>
|
||||
#include <torch/csrc/jit/python/pybind_utils.h>
|
||||
#include <torch/csrc/jit/python/python_arg_flatten.h>
|
||||
#include <torch/csrc/jit/python/python_custom_class.h>
|
||||
|
@ -59,7 +59,7 @@ void clear_registered_instances(void* ptr) {
|
||||
// SymIntList is in fact only ints, and if so, you called this with T=int64_t.
|
||||
// This precondition is NOT checked at runtime.
|
||||
template <typename T>
|
||||
IValue listToIValue(py::handle obj) {
|
||||
static IValue listToIValue(py::handle obj) {
|
||||
c10::List<T> rs;
|
||||
for (auto it = obj.begin(); it != obj.end(); it++) {
|
||||
auto elm = *it;
|
||||
|
@ -26,13 +26,13 @@
|
||||
namespace torch::jit {
|
||||
|
||||
// Controls whether graph source ranges are printed by default
|
||||
bool global_print_source_ranges = true;
|
||||
static bool global_print_source_ranges = true;
|
||||
|
||||
Symbol ConcretePythonOp::Kind = prim::PythonOp;
|
||||
|
||||
using c10::Type;
|
||||
|
||||
std::string getPythonName(const PyObject* obj_) {
|
||||
static std::string getPythonName(const PyObject* obj_) {
|
||||
pybind11::gil_scoped_acquire gil;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
||||
PyObject* obj = const_cast<PyObject*>(obj_);
|
||||
@ -41,7 +41,7 @@ std::string getPythonName(const PyObject* obj_) {
|
||||
return py::str(v);
|
||||
}
|
||||
|
||||
std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
|
||||
static std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
|
||||
pybind11::gil_scoped_acquire gil;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
||||
auto pyobj = py::handle(const_cast<PyObject*>(obj.get()));
|
||||
@ -81,7 +81,7 @@ std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
|
||||
}
|
||||
}
|
||||
|
||||
Node* findNode(
|
||||
static Node* findNode(
|
||||
c10::ArrayRef<torch::jit::Block*> blocks,
|
||||
Symbol kind,
|
||||
bool recurse = true) {
|
||||
@ -101,7 +101,7 @@ Node* findNode(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* findNode(Block* block, Symbol kind, bool recurse = true) {
|
||||
static Node* findNode(Block* block, Symbol kind, bool recurse = true) {
|
||||
std::vector<Block*> blocks = {block};
|
||||
return findNode(blocks, kind, recurse);
|
||||
}
|
||||
|
@ -381,7 +381,7 @@ SugaredValuePtr ModuleValue::getitem(
|
||||
<< "ParameterList, and ParameterDict modules are subscriptable");
|
||||
}
|
||||
|
||||
void checkInterface(
|
||||
static void checkInterface(
|
||||
const SourceRange& loc,
|
||||
GraphFunction& m,
|
||||
const std::shared_ptr<ModuleValue>& self,
|
||||
@ -582,7 +582,7 @@ std::shared_ptr<SugaredValue> SugaredDict::attr(
|
||||
TORCH_INTERNAL_ASSERT(false);
|
||||
}
|
||||
|
||||
std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
|
||||
static std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
|
||||
const py::object& obj,
|
||||
GraphFunction& m,
|
||||
const SourceRange& loc) {
|
||||
@ -595,7 +595,7 @@ std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
|
||||
}
|
||||
|
||||
// helper function for instantiating a SugaredValue from an IValue
|
||||
std::shared_ptr<SugaredValue> toSugaredValue(
|
||||
static std::shared_ptr<SugaredValue> toSugaredValue(
|
||||
const IValue& v,
|
||||
GraphFunction& m,
|
||||
const SourceRange& loc) {
|
||||
@ -1057,7 +1057,7 @@ TypePtr registerNamedTuple(
|
||||
return tt;
|
||||
}
|
||||
|
||||
bool isEnumClass(py::object obj) {
|
||||
static bool isEnumClass(py::object obj) {
|
||||
auto enum_type_obj =
|
||||
py::cast<py::object>(py::module::import("enum").attr("Enum"));
|
||||
int ret = PyObject_IsSubclass(obj.ptr(), enum_type_obj.ptr());
|
||||
@ -1068,7 +1068,7 @@ bool isEnumClass(py::object obj) {
|
||||
return ret == 1;
|
||||
}
|
||||
|
||||
std::shared_ptr<SugaredValue> createSimpleEnumValue(
|
||||
static std::shared_ptr<SugaredValue> createSimpleEnumValue(
|
||||
const py::object& obj,
|
||||
GraphFunction& m,
|
||||
const SourceRange& loc) {
|
||||
|
@ -22,7 +22,7 @@ namespace torch::jit::tracer {
|
||||
|
||||
// Python interpreter retrieval routine adapted from
|
||||
// https://stackoverflow.com/a/8706144
|
||||
std::vector<StackEntry> _pythonCallstack() {
|
||||
static std::vector<StackEntry> _pythonCallstack() {
|
||||
pybind11::gil_scoped_acquire gil;
|
||||
PyFrameObject* frame = PyEval_GetFrame();
|
||||
Py_XINCREF(frame);
|
||||
@ -196,11 +196,11 @@ Node* preRecordPythonTrace(
|
||||
return n;
|
||||
}
|
||||
|
||||
void pythonRecordSourceLocation(Node* n) {
|
||||
static void pythonRecordSourceLocation(Node* n) {
|
||||
n->setSourceRange(getPythonInterpreterSourceRange());
|
||||
}
|
||||
|
||||
void pythonWarn(const std::string& reason) {
|
||||
static void pythonWarn(const std::string& reason) {
|
||||
pybind11::gil_scoped_acquire gil;
|
||||
auto warn_class = py::module::import("torch.jit").attr("TracerWarning");
|
||||
PyErr_WarnEx(warn_class.ptr(), reason.c_str(), 1);
|
||||
|
@ -12,7 +12,7 @@ namespace py = pybind11;
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
std::optional<std::string> maybeConvertToString(const py::object& obj) {
|
||||
static std::optional<std::string> maybeConvertToString(const py::object& obj) {
|
||||
if (obj.is_none()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
@ -58,14 +58,16 @@ struct SourceRangeFactory {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
List<T> wrap_list(const SourceRange& fallback_pos, std::vector<T>&& vec) {
|
||||
static List<T> wrap_list(
|
||||
const SourceRange& fallback_pos,
|
||||
std::vector<T>&& vec) {
|
||||
if (vec.empty())
|
||||
return List<T>::create(fallback_pos, std::move(vec));
|
||||
return List<T>::create(vec.front().range(), std::move(vec));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Maybe<T> wrap_maybe(const SourceRange& fallback_pos, T* val) {
|
||||
static Maybe<T> wrap_maybe(const SourceRange& fallback_pos, T* val) {
|
||||
return val ? Maybe<T>::create(val->range(), *val)
|
||||
: Maybe<T>::create(fallback_pos);
|
||||
}
|
||||
|
@ -246,7 +246,7 @@ FunctionDefaults calcOverloadedFunctionDefaults(
|
||||
|
||||
} // namespace
|
||||
|
||||
bool checkMutableFunctionDefault(const py::object& def_arg) {
|
||||
static bool checkMutableFunctionDefault(const py::object& def_arg) {
|
||||
if (py::isinstance<py::list>(def_arg) || py::isinstance<py::dict>(def_arg)) {
|
||||
return true;
|
||||
}
|
||||
@ -262,7 +262,7 @@ bool checkMutableFunctionDefault(const py::object& def_arg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void checkMutableFunctionDefault(
|
||||
static void checkMutableFunctionDefault(
|
||||
const SourceRange& range,
|
||||
const Argument& arg,
|
||||
const py::object& def_arg) {
|
||||
@ -276,7 +276,7 @@ void checkMutableFunctionDefault(
|
||||
}
|
||||
}
|
||||
|
||||
FunctionSchema getSchemaWithNameAndDefaults(
|
||||
static FunctionSchema getSchemaWithNameAndDefaults(
|
||||
const SourceRange& range,
|
||||
const FunctionSchema& schema,
|
||||
const std::optional<std::string>& new_name,
|
||||
@ -472,7 +472,7 @@ static std::shared_ptr<Graph> _propagate_and_assign_input_shapes(
|
||||
return retval;
|
||||
}
|
||||
|
||||
void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
|
||||
static void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
|
||||
// Make a graph with a fake self argument
|
||||
auto graph = toGraphFunction(*func.function_).graph()->copy();
|
||||
auto v = graph->insertInput(0, "self");
|
||||
@ -484,7 +484,7 @@ void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
|
||||
}
|
||||
|
||||
// this is used in our test suite to check that we correctly preserved type tags
|
||||
bool ivalue_tags_match(const Module& lhs, const Module& rhs) {
|
||||
static bool ivalue_tags_match(const Module& lhs, const Module& rhs) {
|
||||
struct Work {
|
||||
IValue a;
|
||||
IValue b;
|
||||
@ -605,7 +605,7 @@ struct slot_dict_impl {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
py::list debugMakeList(const T& list) {
|
||||
static py::list debugMakeList(const T& list) {
|
||||
py::list result;
|
||||
for (const auto& elem : list) {
|
||||
result.append(py::cast(elem));
|
||||
@ -613,7 +613,7 @@ py::list debugMakeList(const T& list) {
|
||||
return result;
|
||||
}
|
||||
template <typename T>
|
||||
py::list debugMakeNamedList(const T& list) {
|
||||
static py::list debugMakeNamedList(const T& list) {
|
||||
py::list result;
|
||||
for (auto elem : list) {
|
||||
result.append(py::cast(std::make_pair(elem.name, elem.value)));
|
||||
@ -621,7 +621,7 @@ py::list debugMakeNamedList(const T& list) {
|
||||
return result;
|
||||
}
|
||||
template <typename T>
|
||||
py::set debugMakeSet(const T& list) {
|
||||
static py::set debugMakeSet(const T& list) {
|
||||
py::set result;
|
||||
for (const auto& elem : list) {
|
||||
result.add(py::cast(elem));
|
||||
@ -674,7 +674,7 @@ struct DeepCopyMemoTable {
|
||||
std::shared_ptr<IValue::HashIdentityIValueMap> map;
|
||||
};
|
||||
|
||||
IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
|
||||
static IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
|
||||
if (!memo.contains(py::str("__torch_script_memo_table"))) {
|
||||
memo["__torch_script_memo_table"] =
|
||||
DeepCopyMemoTable{std::make_shared<IValue::HashIdentityIValueMap>()};
|
||||
@ -684,7 +684,7 @@ IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
|
||||
return ivalue.deepcopy(ivalue_memo);
|
||||
}
|
||||
|
||||
ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
|
||||
static ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
|
||||
ExtraFilesMap r;
|
||||
for (const auto& it : pydict) {
|
||||
r[py::cast<std::string>(it.first)] = "";
|
||||
@ -692,14 +692,16 @@ ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
|
||||
return r;
|
||||
}
|
||||
|
||||
void extra_files_to_python(const ExtraFilesMap& m, const py::dict& pydict) {
|
||||
static void extra_files_to_python(
|
||||
const ExtraFilesMap& m,
|
||||
const py::dict& pydict) {
|
||||
// py::dict is pointer-like type so it gets modified despite const&
|
||||
for (const auto& it : m) {
|
||||
pydict[py::str(it.first)] = py::bytes(it.second);
|
||||
}
|
||||
}
|
||||
|
||||
void pyCompilationUnitDefine(
|
||||
static void pyCompilationUnitDefine(
|
||||
CompilationUnit& cu,
|
||||
const std::string& src,
|
||||
const ResolutionCallback* rcb,
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
thread_local bool kOptimize = true;
|
||||
static thread_local bool kOptimize = true;
|
||||
void setGraphExecutorOptimize(bool o) {
|
||||
kOptimize = o;
|
||||
}
|
||||
|
@ -165,8 +165,8 @@ struct JitDecomp final : torch::autograd::impl::JitDecompInterface {
|
||||
torch::jit::Stack* stack) const override;
|
||||
};
|
||||
|
||||
JitDecomp jitDecomp;
|
||||
torch::autograd::impl::JitDecompRegisterer registerJitDecomp(&jitDecomp);
|
||||
static JitDecomp jitDecomp;
|
||||
static torch::autograd::impl::JitDecompRegisterer registerJitDecomp(&jitDecomp);
|
||||
|
||||
void JitDecomp::run_jit_decomposition(
|
||||
const c10::OperatorHandle& op,
|
||||
|
@ -82,7 +82,7 @@ c10::AliasAnalysisKind aliasAnalysisInternalSpecialCase() {
|
||||
// for debugging it is helpful to be able to force autodiff subgraphs
|
||||
// to be created, to check their correctness, even when the
|
||||
// size of the of the subgraph is too small to be profitable.
|
||||
thread_local bool autodiff_subgraph_inlining = true;
|
||||
static thread_local bool autodiff_subgraph_inlining = true;
|
||||
void debugSetAutodiffSubgraphInlining(bool state) {
|
||||
autodiff_subgraph_inlining = state;
|
||||
}
|
||||
@ -102,7 +102,7 @@ bool getFusionGroupInlining() {
|
||||
return fusion_group_inlining;
|
||||
}
|
||||
|
||||
thread_local std::weak_ptr<Graph> last_executed_optimized_graph;
|
||||
static thread_local std::weak_ptr<Graph> last_executed_optimized_graph;
|
||||
std::shared_ptr<Graph> lastExecutedOptimizedGraph() {
|
||||
return last_executed_optimized_graph.lock();
|
||||
}
|
||||
@ -542,7 +542,7 @@ Gradient getGradient(const Node* n) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
RegisterOperators reg_graph_executor_ops({Operator(
|
||||
static RegisterOperators reg_graph_executor_ops({Operator(
|
||||
prim::DifferentiableGraph,
|
||||
[](const Node* n) -> Operation {
|
||||
return DifferentiableGraphOp(getGradient(n));
|
||||
|
@ -106,7 +106,7 @@ inline int64_t getDistAutogradContextId() {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
thread_local InterpreterStateImpl* tls_int_state_ptr_ = nullptr;
|
||||
static thread_local InterpreterStateImpl* tls_int_state_ptr_ = nullptr;
|
||||
struct TLSCurrentInterpreterGuard {
|
||||
TLSCurrentInterpreterGuard(InterpreterStateImpl* state)
|
||||
: prev_state_(tls_int_state_ptr_) {
|
||||
|
@ -42,7 +42,7 @@ void LockingLogger::setAggregationType(
|
||||
agg_types[stat_name] = type;
|
||||
}
|
||||
|
||||
std::atomic<LoggerBase*> global_logger{new NoopLogger()};
|
||||
static std::atomic<LoggerBase*> global_logger{new NoopLogger()};
|
||||
|
||||
LoggerBase* getLogger() {
|
||||
return global_logger.load();
|
||||
|
@ -16,7 +16,7 @@
|
||||
namespace torch::jit {
|
||||
|
||||
|
||||
std::string shape_funcs = ""
|
||||
static std::string shape_funcs = ""
|
||||
+ std::string(R"=====(
|
||||
def unary(self: List[int]) -> List[int]:
|
||||
out = annotate(List[int], [])
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
void createFusionGroups(Block* block, AliasDb* aliasDb, size_t min_size);
|
||||
static void createFusionGroups(Block* block, AliasDb* aliasDb, size_t min_size);
|
||||
|
||||
void fuseStaticSubgraphs(std::shared_ptr<Graph> graph, size_t min_size) {
|
||||
Inline(*graph);
|
||||
@ -60,7 +60,7 @@ static Operation createStaticSubgraphRuntime(const Node* node) {
|
||||
};
|
||||
}
|
||||
|
||||
RegisterOperators StaticSubgraphOps({torch::jit::Operator(
|
||||
static RegisterOperators StaticSubgraphOps({torch::jit::Operator(
|
||||
prim::StaticSubgraph,
|
||||
createStaticSubgraphRuntime,
|
||||
AliasAnalysisKind::INTERNAL_SPECIAL_CASE)});
|
||||
|
@ -956,7 +956,7 @@ std::vector<std::string> export_opnames(const script::Module& m) {
|
||||
// Thread local flag (only happens in export, i.e. on server side)
|
||||
// to control if instructions for bytecode default inputs are emitted
|
||||
// or not. It's the major difference between bytecode v5 and v6.
|
||||
thread_local bool emitBytecodeDefaultInputs =
|
||||
static thread_local bool emitBytecodeDefaultInputs =
|
||||
caffe2::serialize::kProducedBytecodeVersion <= 5 ? true : false;
|
||||
bool BytecodeEmitMode::is_default_value_for_unspecified_arg_enabled() {
|
||||
return emitBytecodeDefaultInputs;
|
||||
@ -966,7 +966,7 @@ void BytecodeEmitMode::set_default_value_for_unspecified_arg_enabled(
|
||||
emitBytecodeDefaultInputs = enabled;
|
||||
}
|
||||
|
||||
thread_local bool emitDefautlArgsWithOutArgs =
|
||||
static thread_local bool emitDefautlArgsWithOutArgs =
|
||||
caffe2::serialize::kProducedBytecodeVersion <= 6 ? false : true;
|
||||
bool BytecodeEmitMode::is_default_args_before_out_args_enabled() {
|
||||
return emitDefautlArgsWithOutArgs;
|
||||
@ -975,7 +975,7 @@ void BytecodeEmitMode::set_default_args_before_out_args_enabled(bool enabled) {
|
||||
emitDefautlArgsWithOutArgs = enabled;
|
||||
}
|
||||
|
||||
thread_local bool emitDefaultEmitPromotedOps =
|
||||
static thread_local bool emitDefaultEmitPromotedOps =
|
||||
caffe2::serialize::kProducedBytecodeVersion <= 7 ? false : true;
|
||||
bool BytecodeEmitMode::is_emit_promoted_ops_enabled() {
|
||||
return emitDefaultEmitPromotedOps;
|
||||
|
@ -13,7 +13,7 @@ namespace torch::jit {
|
||||
// "Whether to emit compact debug_pkl when saving a model to .pt file."
|
||||
// "Compact file is smaller but cannot be loaded by old torch binaries."
|
||||
// TODO(qihan) remove when all binaries are using string table.
|
||||
thread_local bool should_use_format_with_string_table_ = true;
|
||||
static thread_local bool should_use_format_with_string_table_ = true;
|
||||
|
||||
class SourceRangeSerializer {
|
||||
public:
|
||||
|
@ -270,18 +270,6 @@ void Unpickler::setInput(size_t memo_id) {
|
||||
}
|
||||
}
|
||||
|
||||
// emplace_back on bool vectors does not exist on some systems
|
||||
// avoid it by calling push_back for bool
|
||||
template <typename T>
|
||||
inline void append(std::vector<T>& a, T&& e) {
|
||||
a.emplace_back(std::forward<T>(e));
|
||||
}
|
||||
template <>
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
|
||||
inline void append<bool>(std::vector<bool>& a, bool&& e) {
|
||||
a.push_back(e);
|
||||
}
|
||||
|
||||
static std::vector<int64_t> tupleToIntList(const IValue& v) {
|
||||
return fmap(v.toTupleRef().elements(), [](const IValue& v) -> int64_t {
|
||||
return v.toInt();
|
||||
@ -1189,7 +1177,7 @@ void Unpickler::readList(IValue list_ivalue) {
|
||||
readListElements(std::move(list_ivalue), start);
|
||||
}
|
||||
|
||||
inline bool is_valid_python_id_char(char c) {
|
||||
static inline bool is_valid_python_id_char(char c) {
|
||||
return c == '_' || c == '.' || (c >= '0' && c <= '9') ||
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
@ -363,6 +363,6 @@ void BlockCodeGen::call_raw(const std::vector<void*>& args) {
|
||||
}
|
||||
|
||||
BlockCodeGen::~BlockCodeGen() = default;
|
||||
RegisterCodeGen<BlockCodeGen> block_codegen_reg("block_codegen");
|
||||
static RegisterCodeGen<BlockCodeGen> block_codegen_reg("block_codegen");
|
||||
|
||||
} // namespace torch::jit::tensorexpr
|
||||
|
@ -18,7 +18,7 @@ namespace torch::jit::tensorexpr {
|
||||
using namespace analysis;
|
||||
|
||||
template <typename Container>
|
||||
BoundsInfo mergeTensorAccesses(
|
||||
static BoundsInfo mergeTensorAccesses(
|
||||
const Container& accesses,
|
||||
const std::unordered_map<VarPtr, BufPtr>& varToBuf,
|
||||
bool distinctAccessKinds) {
|
||||
|
@ -77,7 +77,7 @@ void CppPrinter::printPrologue() {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
|
||||
static inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
|
||||
std::ostream& os,
|
||||
const ExprPtr& lhs,
|
||||
const ExprPtr& rhs) {
|
||||
@ -85,7 +85,7 @@ inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
|
||||
static inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
|
||||
std::ostream& os,
|
||||
const ExprPtr& lhs,
|
||||
const ExprPtr& rhs) {
|
||||
@ -93,35 +93,35 @@ inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::
|
||||
static inline std::
|
||||
enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, void>
|
||||
visit_max(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
os << "std::max(" << *lhs << ", " << *rhs << ")";
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::
|
||||
static inline std::
|
||||
enable_if_t<!std::is_floating_point_v<T> && !std::is_integral_v<T>, void>
|
||||
visit_max(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
os << "(" << *lhs << " < " << *rhs << ") ? " << *rhs << " : " << *lhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::
|
||||
static inline std::
|
||||
enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, void>
|
||||
visit_min(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
os << "std::min(" << *lhs << ", " << *rhs << ")";
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::
|
||||
static inline std::
|
||||
enable_if_t<!std::is_floating_point_v<T> && !std::is_integral_v<T>, void>
|
||||
visit_min(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
os << *lhs << " < " << *rhs << " ? " << *lhs << " : " << *rhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit_binary_op(
|
||||
static void visit_binary_op(
|
||||
std::ostream& os,
|
||||
const ExprPtr& lhs,
|
||||
const ExprPtr& rhs,
|
||||
@ -142,7 +142,7 @@ void visit_binary_op(
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
void dispatch_binary_op(std::ostream& os, const BinaryOpNode<Op>* v) {
|
||||
static void dispatch_binary_op(std::ostream& os, const BinaryOpNode<Op>* v) {
|
||||
switch (v->lhs()->dtype().scalar_type()) {
|
||||
#define TYPE_CASE(Type, Name) \
|
||||
case ScalarType::Name: \
|
||||
@ -400,6 +400,6 @@ void CppCodeGen::call_raw(const std::vector<void*>& args) {
|
||||
os() << "int main() {}" << '\n';
|
||||
}
|
||||
|
||||
RegisterCodeGen<CppCodeGen> cpp_codegen_reg("cpp_codegen");
|
||||
static RegisterCodeGen<CppCodeGen> cpp_codegen_reg("cpp_codegen");
|
||||
|
||||
} // namespace torch::jit::tensorexpr
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
namespace torch::jit::tensorexpr {
|
||||
|
||||
RegisterCodeGen<SimpleIREvaluator> ir_eval_codegen_reg("simple_ir_eval");
|
||||
static RegisterCodeGen<SimpleIREvaluator> ir_eval_codegen_reg("simple_ir_eval");
|
||||
|
||||
int64_t InterpValue::intValue() const {
|
||||
#define TYPE_CASE(Type, Name) \
|
||||
@ -24,43 +24,42 @@ int64_t InterpValue::intValue() const {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<std::is_integral_v<T>, T> mod_value(T lhs, T rhs) {
|
||||
static inline std::enable_if_t<std::is_integral_v<T>, T> mod_value(
|
||||
T lhs,
|
||||
T rhs) {
|
||||
return lhs % rhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<std::is_floating_point_v<T>, T> mod_value(
|
||||
static inline std::enable_if_t<std::is_floating_point_v<T>, T> mod_value(
|
||||
T lhs,
|
||||
T rhs) {
|
||||
return std::fmod(lhs, rhs);
|
||||
}
|
||||
|
||||
inline bool mod_value(bool lhs, bool rhs) {
|
||||
static inline bool mod_value(bool lhs, bool rhs) {
|
||||
throw std::runtime_error("Attempted modulus of bool");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<std::is_integral_v<T>, T> div_value(T lhs, T rhs) {
|
||||
static inline std::enable_if_t<std::is_integral_v<T>, T> div_value(
|
||||
T lhs,
|
||||
T rhs) {
|
||||
TORCH_CHECK(rhs != 0, "Division by zero");
|
||||
return lhs / rhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::enable_if_t<std::is_floating_point_v<T>, T>
|
||||
static inline std::enable_if_t<std::is_floating_point_v<T>, T>
|
||||
__ubsan_ignore_float_divide_by_zero__ div_value(T lhs, T rhs) {
|
||||
return lhs / rhs;
|
||||
}
|
||||
|
||||
inline bool div_value(bool lhs, bool rhs) {
|
||||
LOG(FATAL) << "Attempted division of bool";
|
||||
return false;
|
||||
}
|
||||
|
||||
inline c10::Half div_value(c10::Half lhs, c10::Half rhs) {
|
||||
static inline c10::Half div_value(c10::Half lhs, c10::Half rhs) {
|
||||
return lhs / rhs;
|
||||
}
|
||||
|
||||
inline c10::BFloat16 div_value(c10::BFloat16 lhs, c10::BFloat16 rhs) {
|
||||
static inline c10::BFloat16 div_value(c10::BFloat16 lhs, c10::BFloat16 rhs) {
|
||||
return lhs / rhs;
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ template <
|
||||
std::enable_if_t<std::is_same_v<
|
||||
decltype(detail::bin_op_deducer(std::declval<Op>())),
|
||||
void>>* = nullptr>
|
||||
void visitBinaryOp(
|
||||
static void visitBinaryOp(
|
||||
NodePtr<Op> v,
|
||||
const std::string& op_str,
|
||||
IRPrinter* printer,
|
||||
|
@ -8,7 +8,7 @@
|
||||
namespace torch::jit::tensorexpr {
|
||||
|
||||
// Creates a new Expr of the given type with the provided lhs and rhs.
|
||||
inline ExprPtr newBinaryOpOfType(
|
||||
static inline ExprPtr newBinaryOpOfType(
|
||||
IRNodeType expr_type,
|
||||
const ExprPtr& lhs,
|
||||
const ExprPtr& rhs,
|
||||
@ -72,7 +72,7 @@ static ExprPtr mutateBinaryOp(
|
||||
|
||||
// Simple recursive GCD.
|
||||
template <typename T>
|
||||
T gcd(T a, T b) {
|
||||
static T gcd(T a, T b) {
|
||||
if (b == 0) {
|
||||
return a;
|
||||
}
|
||||
@ -205,7 +205,7 @@ void MinTerm::uniquefy() {
|
||||
|
||||
// Handles optimization cases for Broadcast/Ramp +/- Broadcast/Ramp
|
||||
template <class Op>
|
||||
ExprPtr combineMultilane(const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
static ExprPtr combineMultilane(const ExprPtr& lhs, const ExprPtr& rhs) {
|
||||
if (BroadcastPtr bc = to<Broadcast>(lhs)) {
|
||||
if (BroadcastPtr bcother = to<Broadcast>(rhs)) {
|
||||
if (bc->lanes() != bcother->lanes()) {
|
||||
|
@ -19,7 +19,7 @@ template <
|
||||
std::enable_if_t<
|
||||
std::is_same_v<decltype(detail::deducer(std::declval<D>())), void>>* =
|
||||
nullptr>
|
||||
void verifyBitwiseOp(NodePtr<D> v, IRVerifier* verifier) {
|
||||
static void verifyBitwiseOp(NodePtr<D> v, IRVerifier* verifier) {
|
||||
if (!v->lhs()->dtype().is_integral()) {
|
||||
throw unsupported_dtype();
|
||||
}
|
||||
|
@ -1242,7 +1242,7 @@ NNCLoweringFunction TensorExprKernel::getCustomLoweringFor(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<size_t> reverse_sort_indices(const std::vector<T>& v) {
|
||||
static std::vector<size_t> reverse_sort_indices(const std::vector<T>& v) {
|
||||
// initialize original index locations
|
||||
std::vector<size_t> idx(v.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
|
@ -60,7 +60,7 @@ static std::vector<std::vector<ForPtr>> GetAllPerfectlyNestedLoopNests(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::tuple<std::vector<T>, std::vector<int>> select_n_randomly(
|
||||
static std::tuple<std::vector<T>, std::vector<int>> select_n_randomly(
|
||||
std::vector<T>& objects,
|
||||
int n,
|
||||
std::default_random_engine& random_engine) {
|
||||
@ -100,7 +100,7 @@ static void printHistory(int index, std::string message) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(std::vector<T> indices, char sep = ',') {
|
||||
static std::string join(std::vector<T> indices, char sep = ',') {
|
||||
std::string s;
|
||||
for (const auto& index : indices) {
|
||||
s += std::to_string(index) + sep;
|
||||
@ -118,7 +118,7 @@ static std::string join(
|
||||
return s;
|
||||
}
|
||||
template <typename T>
|
||||
std::string indexOf(const std::vector<T>& objects, const T& object) {
|
||||
static std::string indexOf(const std::vector<T>& objects, const T& object) {
|
||||
return std::to_string(std::distance(
|
||||
objects.begin(), std::find(objects.begin(), objects.end(), object)));
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <torch/csrc/jit/tensorexpr/loopnest.h>
|
||||
#include <torch/csrc/jit/tensorexpr/lowerings.h>
|
||||
#include <torch/csrc/jit/tensorexpr/reduction.h>
|
||||
#include <torch/csrc/jit/tensorexpr/tensorexpr_init.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -25,7 +26,7 @@ struct pybind11::detail::type_caster<torch::jit::tensorexpr::ArgValue>
|
||||
namespace torch::jit {
|
||||
using namespace torch::jit::tensorexpr;
|
||||
|
||||
ArgValue convertPyToArgValue(py::handle inp) {
|
||||
static ArgValue convertPyToArgValue(py::handle inp) {
|
||||
if (py::isinstance<BufHandle>(inp)) {
|
||||
return py::cast<BufHandle>(inp);
|
||||
} else if (py::isinstance<VarHandle>(inp)) {
|
||||
@ -54,7 +55,7 @@ ArgValue convertPyToArgValue(py::handle inp) {
|
||||
}
|
||||
}
|
||||
|
||||
Dtype parsePythonDtype(py::handle obj) {
|
||||
static Dtype parsePythonDtype(py::handle obj) {
|
||||
if (THPDtype_Check(obj.ptr())) {
|
||||
return Dtype(reinterpret_cast<THPDtype*>(obj.ptr())->scalar_type);
|
||||
} else {
|
||||
|
@ -86,5 +86,4 @@ std::string& getLTCForceFallback() {
|
||||
return config;
|
||||
}
|
||||
|
||||
// NOLINTEND(misc-use-internal-linkage)
|
||||
} // namespace torch::lazy
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <torch/csrc/lazy/core/ir_metadata.h>
|
||||
|
||||
// Enables caching on for dynamic shapes (aka disable hash on shapes)
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
// clang-format off
|
||||
C10_DEFINE_bool(
|
||||
ltc_enable_dynamic_shapes,
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include <utility>
|
||||
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
C10_DEFINE_bool(
|
||||
ltc_enable_symbolic_shapes,
|
||||
false,
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <torch/csrc/lazy/core/config.h>
|
||||
#include <torch/csrc/lazy/ts_backend/config.h>
|
||||
|
||||
// TODO(whc) unclear if this is useful, has only been tested as true
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_ts_tensor_update_sync,
|
||||
true,
|
||||
@ -9,7 +9,6 @@ C10_DEFINE_bool(
|
||||
|
||||
// TODO(whc) we need to hook up these flags in a more useful way
|
||||
// possibly also keep LTC_TS_CUDA env working?
|
||||
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||
C10_DEFINE_bool(
|
||||
torch_lazy_ts_cuda,
|
||||
false,
|
||||
|
@ -8,10 +8,10 @@
|
||||
#include <libshm/libshm.h>
|
||||
#include <libshm/socket.h>
|
||||
|
||||
std::unordered_map<std::string, ClientSocket> managers;
|
||||
std::string manager_executable_path;
|
||||
static std::unordered_map<std::string, ClientSocket> managers;
|
||||
static std::string manager_executable_path;
|
||||
|
||||
AllocInfo get_alloc_info(const char* filename) {
|
||||
static AllocInfo get_alloc_info(const char* filename) {
|
||||
AllocInfo info = {};
|
||||
info.pid = getpid();
|
||||
info.free = false;
|
||||
@ -23,7 +23,7 @@ AllocInfo get_alloc_info(const char* filename) {
|
||||
return info;
|
||||
}
|
||||
|
||||
void start_manager() {
|
||||
static void start_manager() {
|
||||
std::array<int, 2> pipe_ends;
|
||||
SYSCHECK_ERR_RETURN_NEG1(pipe(pipe_ends.data()));
|
||||
|
||||
@ -78,7 +78,7 @@ void start_manager() {
|
||||
managers.emplace(std::move(handle), std::move(manager));
|
||||
}
|
||||
|
||||
ClientSocket& get_manager_socket(const std::string& manager_handle) {
|
||||
static ClientSocket& get_manager_socket(const std::string& manager_handle) {
|
||||
auto it = managers.find(manager_handle);
|
||||
if (it == managers.end()) {
|
||||
auto socket = ClientSocket(manager_handle);
|
||||
|
@ -32,19 +32,19 @@ struct ClientSession {
|
||||
pid_t pid;
|
||||
};
|
||||
|
||||
std::vector<struct pollfd> pollfds;
|
||||
std::unordered_map<int, ClientSession> client_sessions;
|
||||
static std::vector<struct pollfd> pollfds;
|
||||
static std::unordered_map<int, ClientSession> client_sessions;
|
||||
// TODO: check if objects have been freed from time to time
|
||||
std::set<std::string> used_objects;
|
||||
static std::set<std::string> used_objects;
|
||||
|
||||
void register_fd(int fd) {
|
||||
static void register_fd(int fd) {
|
||||
struct pollfd pfd = {};
|
||||
pfd.fd = fd;
|
||||
pfd.events = POLLIN;
|
||||
pollfds.push_back(pfd);
|
||||
}
|
||||
|
||||
void unregister_fd(int fd) {
|
||||
static void unregister_fd(int fd) {
|
||||
pollfds.erase(
|
||||
std::remove_if(
|
||||
pollfds.begin(),
|
||||
@ -54,7 +54,7 @@ void unregister_fd(int fd) {
|
||||
client_sessions.erase(fd);
|
||||
}
|
||||
|
||||
void print_init_message(std::string_view message) {
|
||||
static void print_init_message(std::string_view message) {
|
||||
ssize_t written_bytes = -1;
|
||||
while (!message.empty()) {
|
||||
// NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
|
||||
@ -69,7 +69,7 @@ void print_init_message(std::string_view message) {
|
||||
}
|
||||
}
|
||||
|
||||
bool object_exists(const char* name) {
|
||||
static bool object_exists(const char* name) {
|
||||
int fd = shm_open(name, O_RDONLY, 0);
|
||||
if (fd >= 0) {
|
||||
close(fd);
|
||||
@ -79,7 +79,7 @@ bool object_exists(const char* name) {
|
||||
}
|
||||
}
|
||||
|
||||
void free_used_object(const std::string& name) {
|
||||
static void free_used_object(const std::string& name) {
|
||||
if (!object_exists(name.c_str())) {
|
||||
DEBUG("object %s appears to have been freed", name.c_str());
|
||||
used_objects.erase(name);
|
||||
|
Reference in New Issue
Block a user