mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-05 08:24:57 +08:00
Register CUDAAllocatorConfig to AcceleratorAllocatorConfig (#165131)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/165131 Approved by: https://github.com/Skylion007 ghstack dependencies: #165129
This commit is contained in:
committed by
PyTorch MergeBot
parent
7ee45f7503
commit
03e5dbb26e
@ -297,7 +297,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
|
|||||||
#endif // USE_ROCM
|
#endif // USE_ROCM
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDAAllocatorConfig::parseArgs(const std::optional<std::string>& env) {
|
void CUDAAllocatorConfig::parseArgs(const std::string& env) {
|
||||||
// If empty, set the default values
|
// If empty, set the default values
|
||||||
m_max_split_size = std::numeric_limits<size_t>::max();
|
m_max_split_size = std::numeric_limits<size_t>::max();
|
||||||
m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0);
|
m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0);
|
||||||
@ -305,16 +305,13 @@ void CUDAAllocatorConfig::parseArgs(const std::optional<std::string>& env) {
|
|||||||
bool used_cudaMallocAsync = false;
|
bool used_cudaMallocAsync = false;
|
||||||
bool used_native_specific_option = false;
|
bool used_native_specific_option = false;
|
||||||
|
|
||||||
if (!env.has_value()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(m_last_allocator_settings_mutex);
|
std::lock_guard<std::mutex> lock(m_last_allocator_settings_mutex);
|
||||||
m_last_allocator_settings = env.value();
|
m_last_allocator_settings = env;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> config;
|
std::vector<std::string> config;
|
||||||
lexArgs(env.value(), config);
|
lexArgs(env, config);
|
||||||
|
|
||||||
for (size_t i = 0; i < config.size(); i++) {
|
for (size_t i = 0; i < config.size(); i++) {
|
||||||
std::string_view config_item_view(config[i]);
|
std::string_view config_item_view(config[i]);
|
||||||
@ -487,9 +484,6 @@ size_t CUDAAllocatorConfig::parsePinnedUseBackgroundThreads(
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// General caching allocator utilities
|
REGISTER_ALLOCATOR_CONFIG_PARSE_HOOK(CUDAAllocatorConfig)
|
||||||
void setAllocatorSettings(const std::string& env) {
|
|
||||||
CUDACachingAllocator::CUDAAllocatorConfig::instance().parseArgs(env.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace c10::cuda::CUDACachingAllocator
|
} // namespace c10::cuda::CUDACachingAllocator
|
||||||
|
|||||||
@ -1,16 +1,10 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <c10/core/AllocatorConfig.h>
|
||||||
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/cuda/CUDAMacros.h>
|
||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
#include <c10/util/env.h>
|
#include <c10/util/env.h>
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <cstddef>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <mutex>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace c10::cuda::CUDACachingAllocator {
|
namespace c10::cuda::CUDACachingAllocator {
|
||||||
|
|
||||||
enum class Expandable_Segments_Handle_Type : int {
|
enum class Expandable_Segments_Handle_Type : int {
|
||||||
@ -111,13 +105,40 @@ class C10_CUDA_API CUDAAllocatorConfig {
|
|||||||
env = c10::utils::get_env("PYTORCH_HIP_ALLOC_CONF");
|
env = c10::utils::get_env("PYTORCH_HIP_ALLOC_CONF");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
inst->parseArgs(env);
|
// Note: keep the parsing order and logic stable to avoid potential
|
||||||
|
// performance regressions in internal tests.
|
||||||
|
if (!env.has_value()) {
|
||||||
|
env = c10::utils::get_env("PYTORCH_ALLOC_CONF");
|
||||||
|
}
|
||||||
|
if (env.has_value()) {
|
||||||
|
inst->parseArgs(env.value());
|
||||||
|
}
|
||||||
return inst;
|
return inst;
|
||||||
})();
|
})();
|
||||||
return *s_instance;
|
return *s_instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parseArgs(const std::optional<std::string>& env);
|
// Use `Construct On First Use Idiom` to avoid `Static Initialization Order`
|
||||||
|
// issue.
|
||||||
|
static const std::unordered_set<std::string>& getKeys() {
|
||||||
|
static std::unordered_set<std::string> keys{
|
||||||
|
"backend",
|
||||||
|
// keep BC for Rocm: `cuda` -> `cud` `a`, to avoid hipify issues
|
||||||
|
// NOLINTBEGIN(bugprone-suspicious-missing-comma,-warnings-as-errors)
|
||||||
|
"release_lock_on_cud"
|
||||||
|
"amalloc",
|
||||||
|
"pinned_use_cud"
|
||||||
|
"a_host_register",
|
||||||
|
// NOLINTEND(bugprone-suspicious-missing-comma,-warnings-as-errors)
|
||||||
|
"release_lock_on_hipmalloc",
|
||||||
|
"pinned_use_hip_host_register",
|
||||||
|
"graph_capture_record_stream_reuse",
|
||||||
|
"pinned_reserve_segment_size_mb",
|
||||||
|
"pinned_num_register_threads"};
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parseArgs(const std::string& env);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CUDAAllocatorConfig();
|
CUDAAllocatorConfig();
|
||||||
@ -174,7 +195,7 @@ class C10_CUDA_API CUDAAllocatorConfig {
|
|||||||
std::mutex m_last_allocator_settings_mutex;
|
std::mutex m_last_allocator_settings_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
// General caching allocator utilities
|
// Keep this for backwards compatibility
|
||||||
C10_CUDA_API void setAllocatorSettings(const std::string& env);
|
using c10::CachingAllocator::setAllocatorSettings;
|
||||||
|
|
||||||
} // namespace c10::cuda::CUDACachingAllocator
|
} // namespace c10::cuda::CUDACachingAllocator
|
||||||
|
|||||||
@ -64,10 +64,6 @@ namespace cuda::CUDACachingAllocator {
|
|||||||
using namespace c10::CachingAllocator;
|
using namespace c10::CachingAllocator;
|
||||||
using namespace c10::CachingDeviceAllocator;
|
using namespace c10::CachingDeviceAllocator;
|
||||||
|
|
||||||
// Included here as this is externally used in CUDAAllocatorConfig
|
|
||||||
const size_t kLargeBuffer =
|
|
||||||
20971520; // "large" allocations may be packed in 20 MiB blocks
|
|
||||||
|
|
||||||
namespace Native {
|
namespace Native {
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <c10/core/AllocatorConfig.h>
|
||||||
#include <c10/core/CachingDeviceAllocator.h>
|
#include <c10/core/CachingDeviceAllocator.h>
|
||||||
#include <c10/cuda/CUDAGraphsC10Utils.h>
|
#include <c10/cuda/CUDAGraphsC10Utils.h>
|
||||||
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/cuda/CUDAMacros.h>
|
||||||
@ -49,10 +50,9 @@ namespace c10::cuda::CUDACachingAllocator {
|
|||||||
|
|
||||||
// Preserved only for BC reasons
|
// Preserved only for BC reasons
|
||||||
// NOLINTNEXTLINE(misc-unused-using-decls)
|
// NOLINTNEXTLINE(misc-unused-using-decls)
|
||||||
|
using c10::CachingAllocator::kLargeBuffer;
|
||||||
using c10::CachingDeviceAllocator::DeviceStats;
|
using c10::CachingDeviceAllocator::DeviceStats;
|
||||||
|
|
||||||
extern const size_t kLargeBuffer;
|
|
||||||
|
|
||||||
typedef std::shared_ptr<GatheredContext> (*CreateContextFn)();
|
typedef std::shared_ptr<GatheredContext> (*CreateContextFn)();
|
||||||
|
|
||||||
// Struct containing info of an allocation block (i.e. a fractional part of a
|
// Struct containing info of an allocation block (i.e. a fractional part of a
|
||||||
|
|||||||
Reference in New Issue
Block a user