diff --git a/c10/util/FbcodeMaps.h b/c10/util/FbcodeMaps.h new file mode 100644 index 000000000000..9832cf36d5c5 --- /dev/null +++ b/c10/util/FbcodeMaps.h @@ -0,0 +1,29 @@ +#ifndef C10_UTIL_FBCODEMAPS_H_ +#define C10_UTIL_FBCODEMAPS_H_ + +// Map typedefs so that we can use folly's F14 maps in fbcode without +// taking a folly dependency. + +#ifdef FBCODE_CAFFE2 +#include +#include +#else +#include +#include +#endif + +namespace c10 { +#ifdef FBCODE_CAFFE2 +template +using FastMap = folly::F14FastMap; +template +using FastSet = folly::F14FastSet; +#else +template +using FastMap = std::unordered_map; +template +using FastSet = std::unordered_set; +#endif +} // namespace c10 + +#endif // C10_UTIL_FBCODEMAPS_H_ diff --git a/torch/csrc/jit/runtime/static/impl.cpp b/torch/csrc/jit/runtime/static/impl.cpp index 1530ca5eebcc..e28725ab8e0d 100644 --- a/torch/csrc/jit/runtime/static/impl.cpp +++ b/torch/csrc/jit/runtime/static/impl.cpp @@ -133,7 +133,7 @@ auto sr_metadata_registerer = torch::class_( } // namespace std::string dumpValueSet( - const FastSet& value_set, + const c10::FastSet& value_set, const char* set_name) { std::ostringstream oss; oss << set_name << ": {"; @@ -229,7 +229,7 @@ bool removeSelfFromGraphInput(std::shared_ptr& graph) { return true; } -std::vector valueVecFromFastSet(const FastSet& s) { +std::vector valueVecFromFastSet(const c10::FastSet& s) { std::vector result; result.reserve(s.size()); for (auto* v : s) { @@ -248,7 +248,7 @@ bool mayContainAlias(const AliasDb& db, const Value* v1, const Value* v2) { bool mayContainAlias( const AliasDb& db, const Value* a, - const FastSet& b) { + const c10::FastSet& b) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) return db.mayContainAlias(const_cast(a), valueVecFromFastSet(b)); } @@ -390,9 +390,9 @@ bool isPureFunction(const Node* node) { ManagedTensorRanges::ManagedTensorRanges( Block& block, const AliasDb& alias_db, - const FastSet& managed_tensor_values) { + const c10::FastSet& managed_tensor_values) { const std::vector nodes(block.nodes().begin(), block.nodes().end()); - const FastSet graph_inputs( + const c10::FastSet graph_inputs( block.inputs().begin(), block.inputs().end()); const auto num_nodes = nodes.size(); @@ -589,7 +589,7 @@ StaticModule::StaticModule( // Maps each Value* in the graph to its index in the values_ array that will // eventually be created by StaticRuntime. - FastMap value_to_index; + c10::FastMap value_to_index; prepareFunctionsAndConstants(graph_->block(), alias_db, value_to_index); const auto constants_index_offset = 0; @@ -610,7 +610,7 @@ StaticModule::StaticModule( size_t StaticModule::prepareBlockInfo( Block* block, const size_t start_idx, - FastMap& value_to_index) { + c10::FastMap& value_to_index) { block_infos_.emplace(block, BlockInfo(start_idx, *block)); const auto num_inputs = block->inputs().size(); @@ -671,7 +671,7 @@ void StaticModule::attachNodeMetadata(Block* block) { void StaticModule::prepareFunctionsAndConstants( Block* block, const AliasDb& alias_db, - FastMap& value_to_index) { + c10::FastMap& value_to_index) { for (auto* node : block->nodes()) { for (auto* sub_block : node->blocks()) { prepareFunctionsAndConstants(sub_block, alias_db, value_to_index); @@ -702,14 +702,14 @@ void StaticModule::prepareFunctionsAndConstants( size_t StaticModule::prepareStaticNodeInfos( Block* block, - const FastMap& value_to_index, + const c10::FastMap& value_to_index, const AliasDb& alias_db, size_t node_idx) { const auto node_start = node_idx; auto& block_info = block_infos_.at(block); std::vector nodes; - FastMap node_has_out_variant; + c10::FastMap node_has_out_variant; for (auto* node : block->nodes()) { if (node->kind() == prim::Constant) { @@ -754,7 +754,7 @@ size_t StaticModule::prepareStaticNodeInfos( void BlockInfo::set_nodes( std::vector nodes, - const FastMap& node_has_out_variant) { + const c10::FastMap& node_has_out_variant) { nodes_ = std::move(nodes); for (auto& node : nodes_) { @@ -773,7 +773,7 @@ void BlockInfo::prepare_for_memory_planner( // Never manage graph outputs so that we can do std::move(output_ivalue). // This does not affect performance if the graph returns a collection object. - FastSet graph_output_values( + c10::FastSet graph_output_values( block_.outputs().begin(), block_.outputs().end()); // collect register indices of outputs of ops with out variant @@ -1796,7 +1796,7 @@ bool BlockRunner::check_for_memory_leak( i, " was not cleaned up"); } - FastSet output_ivalues(outputs_.begin(), outputs_.end()); + c10::FastSet output_ivalues(outputs_.begin(), outputs_.end()); for (const auto n : c10::irange(nodes_.size())) { auto& pnode = nodes_[n]; for (const auto i : c10::irange(pnode.num_outputs())) { diff --git a/torch/csrc/jit/runtime/static/impl.h b/torch/csrc/jit/runtime/static/impl.h index 5344a1a4bbe2..a9276e2309bd 100644 --- a/torch/csrc/jit/runtime/static/impl.h +++ b/torch/csrc/jit/runtime/static/impl.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -24,23 +25,11 @@ namespace torch { namespace jit { -#ifdef FBCODE_CAFFE2 -template -using FastMap = folly::F14FastMap; -template -using FastSet = folly::F14FastSet; -#else -template -using FastMap = std::unordered_map; -template -using FastSet = std::unordered_set; -#endif - TORCH_API bool canEnableStaticRuntime( const std::shared_ptr& graph); TORCH_API std::string dumpValueSet( - const FastSet& value_set, + const c10::FastSet& value_set, const char* set_name = ""); TORCH_API inline bool doesNotHeapAllocateWhenStoredInIValue(const Type& type) { @@ -111,8 +100,8 @@ class ValueGroup { } private: - FastSet output_aliases_; - FastSet external_aliases_; + c10::FastSet output_aliases_; + c10::FastSet external_aliases_; }; class TORCH_API ManagedTensorRanges { @@ -121,7 +110,7 @@ class TORCH_API ManagedTensorRanges { ManagedTensorRanges( Block& block, const AliasDb& alias_db, - const FastSet& managed_tensor_values); + const c10::FastSet& managed_tensor_values); // If true, then this node is the last use of at least one // managed tensor. availableTensorValuesAfterNode(node) will return a vector @@ -154,9 +143,9 @@ class TORCH_API ManagedTensorRanges { // Maps Node* to the set of managed tensors that are now available // for re-use after this node. - FastMap> node_to_newly_free_tensors_{}; + c10::FastMap> node_to_newly_free_tensors_{}; // Maps each Value* to its lifetime (start node index, end node index) - FastMap value_lifetimes_{}; + c10::FastMap value_lifetimes_{}; }; struct TORCH_API StaticModuleOptions { @@ -277,7 +266,7 @@ class BlockInfo { void set_nodes( std::vector nodes, - const FastMap& node_has_out_variant); + const c10::FastMap& node_has_out_variant); const std::vector& nodes() const { return nodes_; @@ -357,10 +346,10 @@ class BlockInfo { ValueGroup value_group_; - FastSet node_is_optimizable_container_type_; - FastSet managed_tensor_values_; - FastSet managed_output_tensor_values_; - FastSet leaked_values_; + c10::FastSet node_is_optimizable_container_type_; + c10::FastSet managed_tensor_values_; + c10::FastSet managed_output_tensor_values_; + c10::FastSet leaked_values_; ManagedTensorRanges managed_tensor_ranges_{}; @@ -481,12 +470,12 @@ class TORCH_API StaticModule { size_t prepareBlockInfo( Block* block, const size_t start_idx, - FastMap& value_to_index); + c10::FastMap& value_to_index); void prepareFunctionsAndConstants( Block* block, const AliasDb& alias_db, - FastMap& value_to_index); + c10::FastMap& value_to_index); // Recursively traverse the graph and attach SR metadata // to the prim::fork nodes as additional attributes @@ -496,7 +485,7 @@ class TORCH_API StaticModule { // Returns (number of nodes processed, number of blocks processed) size_t prepareStaticNodeInfos( Block* block, - const FastMap& value_to_index, + const c10::FastMap& value_to_index, const AliasDb& alias_db, size_t node_idx = 0); @@ -531,7 +520,7 @@ class TORCH_API StaticModule { // includes it anyways to be consistent with the JIT interpreter. size_t num_inputs_; // See `BlockInfo` definition. The blocks are stored in depth-first order. - FastMap block_infos_; + c10::FastMap block_infos_; size_t value_buffer_size_ = 0; }; diff --git a/torch/csrc/jit/runtime/static/memory_planner.cpp b/torch/csrc/jit/runtime/static/memory_planner.cpp index e8b0fb6a3840..d91e3563c873 100644 --- a/torch/csrc/jit/runtime/static/memory_planner.cpp +++ b/torch/csrc/jit/runtime/static/memory_planner.cpp @@ -24,10 +24,10 @@ bool isUnmanagedSpecialCase(const ProcessedNode& pnode, size_t output_idx) { pnode.Output(output_idx).isNone(); } -FastMap tensorValueToTensor( +c10::FastMap tensorValueToTensor( const std::vector& nodes, - const FastSet& managed_tensor_values) { - FastMap tensor_value_to_tensor; + const c10::FastSet& managed_tensor_values) { + c10::FastMap tensor_value_to_tensor; for (auto& pnode : nodes) { auto* node = pnode.node(); for (const auto output_idx : c10::irange(node->outputs().size())) { @@ -72,10 +72,10 @@ at::DataPtr allocate_buffer(size_t size) { std::vector assignStorageToManagedTensors( graph_node_list nodes, const ManagedTensorRanges& ranges, - const FastMap& tensor_value_to_tensor) { + const c10::FastMap& tensor_value_to_tensor) { std::vector managed_tensor_groups; // This set maps each Value* to its assigned storage group. - FastMap storage_group_mapping; + c10::FastMap storage_group_mapping; // On each iteration, this vector stores the set of storage groups that // are available for re-use. std::vector free_storage_groups; @@ -137,13 +137,13 @@ std::vector assignStorageToManagedTensors( namespace { -bool setIncludes(const FastSet& set, const Value* v) { +bool setIncludes(const c10::FastSet& set, const Value* v) { return set.find(v) != set.end(); } std::vector> assignStorageToOutputTensors( BlockRunner* block_runner, - const FastSet& managed_output_tensor_values) { + const c10::FastSet& managed_output_tensor_values) { std::vector> managed_output_tensors; for (auto& pnode : block_runner->nodes()) { for (const auto i : c10::irange(pnode.outputs().size())) { @@ -174,8 +174,8 @@ MemoryPlanner::MemoryPlanner( const auto& leaked_values = block_info.leaked_values(); // collect unmanaged output ivalues - FastSet unmanaged_ivalues; - FastSet unmanaged_borrowed_ivalues; + c10::FastSet unmanaged_ivalues; + c10::FastSet unmanaged_borrowed_ivalues; for (ProcessedNode& pnode : block_runner->nodes()) { const auto borrows_outputs = borrowsOutputs(pnode.node()->kind()); for (const auto i : c10::irange(pnode.outputs().size())) { diff --git a/torch/csrc/jit/runtime/static/memory_planner.h b/torch/csrc/jit/runtime/static/memory_planner.h index 3f849b4daf7b..f5c7a153d6ae 100644 --- a/torch/csrc/jit/runtime/static/memory_planner.h +++ b/torch/csrc/jit/runtime/static/memory_planner.h @@ -42,7 +42,7 @@ class StorageGroup { TORCH_API std::vector assignStorageToManagedTensors( graph_node_list nodes, const ManagedTensorRanges& ranges, - const FastMap& tensor_value_to_tensor); + const c10::FastMap& tensor_value_to_tensor); // There are three types of ops in a processed graph in Static Runtime: // 1. op with _out variant diff --git a/torch/csrc/jit/runtime/static/ops.cpp b/torch/csrc/jit/runtime/static/ops.cpp index 679b28a822bc..6f4a5d76f3ae 100644 --- a/torch/csrc/jit/runtime/static/ops.cpp +++ b/torch/csrc/jit/runtime/static/ops.cpp @@ -393,7 +393,7 @@ bool disableUnsafeMathOp(const char* op_name) { // not guarantee bit exactness vs the jit interpreter. Note aten::relu is not // included even though it uses NNC because the results of relu should always // match. - static const FastSet fast_ops{ + static const c10::FastSet fast_ops{ "aten::add", "aten::tanh", "aten::sigmoid", "aten::logit"}; return fast_ops.count(op_name) > 0; } @@ -417,7 +417,7 @@ bool hasVarArgs(Node* n) { bool canReuseInputsOutputs( Node* n, - const FastMap& node_has_out_variant) { + const c10::FastMap& node_has_out_variant) { auto it = node_has_out_variant.find(n); if (it != node_has_out_variant.end()) { return it->second; @@ -430,7 +430,7 @@ bool canReuseInputsOutputs( // This means the IValues will not change run to run bool inputsCanRunOutOfPlace( Node* n, - const FastMap& node_has_out_variant) { + const c10::FastMap& node_has_out_variant) { for (auto* input : n->inputs()) { if (!canReuseInputsOutputs(input->node(), node_has_out_variant)) { return false; @@ -441,7 +441,7 @@ bool inputsCanRunOutOfPlace( bool isOptimizableContainerType( Node* n, - const FastMap& node_has_out_variant) { + const c10::FastMap& node_has_out_variant) { const auto& type = n->output()->type(); bool is_supported_type = false; if (type->kind() == TypeKind::ListType) { @@ -488,7 +488,7 @@ REGISTER_OPERATOR_FUNCTOR( return nullptr; } const bool can_optimize = - isOptimizableContainerType(n, FastMap()); + isOptimizableContainerType(n, c10::FastMap()); const auto& type = n->output()->type()->expectRef(); const size_t size = n->inputs().size(); if (!can_optimize) { @@ -543,7 +543,7 @@ REGISTER_OPERATOR_FUNCTOR( return nullptr; } const bool can_optimize = - isOptimizableContainerType(n, FastMap()); + isOptimizableContainerType(n, c10::FastMap()); const size_t size = n->inputs().size(); if (!can_optimize) { return [size](ProcessedNode* p_node) { diff --git a/torch/csrc/jit/runtime/static/ops.h b/torch/csrc/jit/runtime/static/ops.h index 8b993e87fb35..7615233df56d 100644 --- a/torch/csrc/jit/runtime/static/ops.h +++ b/torch/csrc/jit/runtime/static/ops.h @@ -148,10 +148,10 @@ bool nativeOpIsRegistered(const c10::Symbol& op_name); bool canReuseInputsOutputs( Node* n, - const FastMap& node_has_out_variant); + const c10::FastMap& node_has_out_variant); bool isOptimizableContainerType( Node* n, - const FastMap& node_has_out_variant); + const c10::FastMap& node_has_out_variant); SROperator getOutOfPlaceOperation(Node* n); SROperator getNativeOperation(Node* n); diff --git a/torch/csrc/jit/runtime/static/passes.cpp b/torch/csrc/jit/runtime/static/passes.cpp index a3875e09650f..970a5a20d826 100644 --- a/torch/csrc/jit/runtime/static/passes.cpp +++ b/torch/csrc/jit/runtime/static/passes.cpp @@ -668,7 +668,7 @@ void ReplaceWithMaybeCopy( void ReplaceWithCopyImpl( std::shared_ptr& graph, - const FastMap& supported, + const c10::FastMap& supported, const std::vector>& supported_schema, const std::function& f_extra_checks, @@ -755,7 +755,7 @@ void ReplacePermuteWithCopy( std::shared_ptr& graph, bool outputs_are_immutable) { AliasDb db(graph); - const FastMap supported = { + const c10::FastMap supported = { #ifdef FBCODE_CAFFE2 OP_PAIR("aten::permute", "static_runtime::permute_copy"), #endif @@ -777,7 +777,7 @@ void ReplaceWithCopy( std::shared_ptr& graph, bool outputs_are_immutable) { AliasDb db(graph); - const FastMap supported = { + const c10::FastMap supported = { #ifdef FBCODE_CAFFE2 OP_PAIR("aten::permute", "static_runtime::permute_copy"), OP_PAIR("fb::expand_dims", "static_runtime::expand_dims_copy"), @@ -868,7 +868,7 @@ bool shouldNotFuseListUnpackSpecialCase(const Node* node) { } // namespace void FuseListUnpack(std::shared_ptr& graph) { - const FastMap unfused_to_fused = { + const c10::FastMap unfused_to_fused = { OP_PAIR( "torcharrow::inference_wrapper_run_flat", "static_runtime::fused_inference_wrapper_run_flat"), @@ -1045,7 +1045,8 @@ void CreateOwnedRefsForSpecialValuesHelper(Graph& graph, Block* block) { auto outputs = block->outputs(); // Create owned refs for inputs. Otherwise, the input cleanup process // will destroy our outputs before we return. - FastSet inputs = {block->inputs().begin(), block->inputs().end()}; + c10::FastSet inputs = { + block->inputs().begin(), block->inputs().end()}; for (const auto i : c10::irange(outputs.size())) { auto* output = outputs[i]; diff --git a/torch/csrc/jit/runtime/static/te_wrapper.cpp b/torch/csrc/jit/runtime/static/te_wrapper.cpp index 35bef91ea70a..73039ffdead6 100644 --- a/torch/csrc/jit/runtime/static/te_wrapper.cpp +++ b/torch/csrc/jit/runtime/static/te_wrapper.cpp @@ -93,8 +93,8 @@ std::mutex& getNNCCacheMutex() { return nncCacheMutex; } -FastMap>& getNNCCache() { - static FastMap> nncCache; +c10::FastMap>& getNNCCache() { + static c10::FastMap> nncCache; return nncCache; }