mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Introduce AcceleratorAllocatorConfig as the common class (#149601)"
This reverts commit 55108074c0795be3b617d3b13b06794f63e1f8ca. Reverted https://github.com/pytorch/pytorch/pull/149601 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](https://github.com/pytorch/pytorch/pull/149601#issuecomment-3050628047))
This commit is contained in:
@ -1,232 +0,0 @@
|
||||
#include <c10/core/AllocatorConfig.h>
|
||||
#include <c10/core/DeviceType.h>
|
||||
#include <c10/util/env.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
namespace c10::CachingAllocator {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kRoundUpPowerOfTwoIntervals = 16;
|
||||
constexpr size_t kMB = 1024 * 1024ul;
|
||||
constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB
|
||||
constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB
|
||||
} // anonymous namespace
|
||||
|
||||
AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() {
|
||||
static AcceleratorAllocatorConfig instance;
|
||||
#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \
|
||||
auto env##_name = c10::utils::get_env(#env); \
|
||||
if (env##_name.has_value()) { \
|
||||
if (deprecated) { \
|
||||
TORCH_WARN_ONCE(#env " is deprecated, use PYTORCH_ALLOC_CONF instead"); \
|
||||
} \
|
||||
instance.parseArgs(env##_name.value()); \
|
||||
return true; \
|
||||
}
|
||||
static bool env_flag [[maybe_unused]] = []() {
|
||||
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF, false)
|
||||
// Keep this for backwards compatibility
|
||||
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF, /*deprecated=*/true)
|
||||
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF, /*deprecated=*/true)
|
||||
return false;
|
||||
}();
|
||||
#undef C10_ALLOCATOR_CONFIG_PARSE_ENV
|
||||
return instance;
|
||||
}
|
||||
|
||||
AcceleratorAllocatorConfig::AcceleratorAllocatorConfig() {
|
||||
roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0);
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::roundup_power2_divisions(size_t size) {
|
||||
size_t log_size = (63 - llvm::countLeadingZeros(size));
|
||||
|
||||
// Our intervals start at 1MB and end at 64GB
|
||||
const size_t interval_start =
|
||||
63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoStart);
|
||||
const size_t interval_end =
|
||||
63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoEnd);
|
||||
TORCH_CHECK(
|
||||
interval_end - interval_start == kRoundUpPowerOfTwoIntervals,
|
||||
"kRoundUpPowerOfTwoIntervals mismatch");
|
||||
|
||||
auto index = (log_size > interval_start) ? (log_size - interval_start) : 0ul;
|
||||
index = std::min(index, kRoundUpPowerOfTwoIntervals - 1);
|
||||
return instance().roundup_power2_divisions_[index];
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parseMaxSplitSize(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB;
|
||||
constexpr size_t max_allowed_split_size_mb =
|
||||
std::numeric_limits<size_t>::max() / kMB;
|
||||
|
||||
size_t val_env = tokenizer.toSizeT(++i);
|
||||
TORCH_CHECK(
|
||||
val_env >= min_allowed_split_size_mb,
|
||||
"CachingAllocator option max_split_size_mb too small, must be >= ",
|
||||
min_allowed_split_size_mb);
|
||||
val_env = std::min(val_env, max_allowed_split_size_mb);
|
||||
max_split_size_ = val_env * kMB;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parseMaxNonSplitRoundingSize(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB;
|
||||
constexpr size_t max_allowed_split_size_mb =
|
||||
std::numeric_limits<size_t>::max() / kMB;
|
||||
|
||||
size_t val_env = tokenizer.toSizeT(++i);
|
||||
TORCH_CHECK(
|
||||
val_env >= min_allowed_split_size_mb,
|
||||
"CachingAllocator option max_non_split_rounding_mb too small, must be >= ",
|
||||
min_allowed_split_size_mb);
|
||||
val_env = std::min(val_env, max_allowed_split_size_mb);
|
||||
max_non_split_rounding_size_ = val_env * kMB;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parseGarbageCollectionThreshold(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
double val_env = tokenizer.toDouble(++i);
|
||||
TORCH_CHECK(
|
||||
val_env > 0 && val_env < 1.0,
|
||||
"garbage_collect_threshold is invalid, set it in (0.0, 1.0)");
|
||||
garbage_collection_threshold_ = val_env;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
bool first_value = true;
|
||||
|
||||
if (tokenizer[++i] == "[") {
|
||||
size_t last_index = 0;
|
||||
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
|
||||
while (++i < tokenizer.size() && tokenizer[i] != "]") {
|
||||
size_t value_index = i;
|
||||
tokenizer.checkToken(++i, ":");
|
||||
size_t value = tokenizer.toSizeT(++i);
|
||||
TORCH_CHECK(
|
||||
value == 0 || llvm::isPowerOf2_64(value),
|
||||
"For roundups, the divisions has to be power of 2 or 0 to disable roundup ");
|
||||
|
||||
if (tokenizer[value_index] == ">") {
|
||||
std::fill(
|
||||
std::next(
|
||||
roundup_power2_divisions_.begin(),
|
||||
static_cast<std::vector<size_t>::difference_type>(
|
||||
last_index + 1)),
|
||||
roundup_power2_divisions_.end(),
|
||||
value);
|
||||
} else {
|
||||
size_t boundary = tokenizer.toSizeT(value_index);
|
||||
TORCH_CHECK(
|
||||
llvm::isPowerOf2_64(boundary),
|
||||
"For roundups, the intervals have to be power of 2 ");
|
||||
|
||||
size_t index = 63 - llvm::countLeadingZeros(boundary);
|
||||
index =
|
||||
std::clamp(index, size_t{0}, roundup_power2_divisions_.size() - 1);
|
||||
|
||||
if (first_value) {
|
||||
std::fill(
|
||||
roundup_power2_divisions_.begin(),
|
||||
std::next(
|
||||
roundup_power2_divisions_.begin(),
|
||||
static_cast<std::vector<size_t>::difference_type>(index)),
|
||||
value);
|
||||
first_value = false;
|
||||
}
|
||||
roundup_power2_divisions_[index] = value;
|
||||
last_index = index;
|
||||
}
|
||||
|
||||
if (tokenizer[i + 1] != "]") {
|
||||
tokenizer.checkToken(++i, ",");
|
||||
}
|
||||
}
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
i < tokenizer.size(),
|
||||
"Expected closing bracket ']' in ConfigTokenizer but reached end of config");
|
||||
} else { // Keep this for backwards compatibility
|
||||
size_t value = tokenizer.toSizeT(i);
|
||||
TORCH_CHECK(
|
||||
llvm::isPowerOf2_64(value),
|
||||
"For roundups, the divisions has to be power of 2 ");
|
||||
std::fill(
|
||||
roundup_power2_divisions_.begin(),
|
||||
roundup_power2_divisions_.end(),
|
||||
value);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parseExpandableSegments(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
use_expandable_segments_ = tokenizer.toBool(++i);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t AcceleratorAllocatorConfig::parsePinnedUseBackgroundThreads(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i) {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
pinned_use_background_threads_ = tokenizer.toBool(++i);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void AcceleratorAllocatorConfig::parseArgs(const std::string& env) {
|
||||
// The following option will be reset to its default value if not explicitly
|
||||
// set each time.
|
||||
max_split_size_ = std::numeric_limits<size_t>::max();
|
||||
roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0);
|
||||
garbage_collection_threshold_ = 0;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(last_allocator_settings_mutex_);
|
||||
last_allocator_settings_ = env;
|
||||
}
|
||||
|
||||
ConfigTokenizer tokenizer(env);
|
||||
for (size_t i = 0; i < tokenizer.size(); i++) {
|
||||
const auto& key = tokenizer[i];
|
||||
if (key == "max_split_size_mb") {
|
||||
i = parseMaxSplitSize(tokenizer, i);
|
||||
} else if (key == "max_non_split_rounding_mb") {
|
||||
i = parseMaxNonSplitRoundingSize(tokenizer, i);
|
||||
} else if (key == "garbage_collection_threshold") {
|
||||
i = parseGarbageCollectionThreshold(tokenizer, i);
|
||||
} else if (key == "roundup_power2_divisions") {
|
||||
i = parseRoundUpPower2Divisions(tokenizer, i);
|
||||
} else if (key == "expandable_segments") {
|
||||
i = parseExpandableSegments(tokenizer, i);
|
||||
} else if (key == "pinned_use_background_threads") {
|
||||
i = parsePinnedUseBackgroundThreads(tokenizer, i);
|
||||
} else {
|
||||
i = tokenizer.skipKey(i);
|
||||
}
|
||||
|
||||
if (i + 1 < tokenizer.size()) {
|
||||
tokenizer.checkToken(++i, ",");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace c10::CachingAllocator
|
@ -1,337 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/DeviceType.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/llvmMathExtras.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace c10::CachingAllocator {
|
||||
|
||||
// "large" allocations may be packed in 20 MiB blocks
|
||||
const size_t kLargeBuffer = 20971520;
|
||||
|
||||
// A utility class for tokenizing allocator configuration strings into discrete
|
||||
// parts. For example, the config string:
|
||||
// "key1:val1,key2:[val2,val3]"
|
||||
// is tokenized into:
|
||||
// "key1", ":", "val1", ",", "key2", ":", "[", "val2", ",", "val3", "]",
|
||||
//
|
||||
// Tokens include keys, values, and special characters (':', ',', '[', ']').
|
||||
// Whitespace is ignored.
|
||||
class ConfigTokenizer {
|
||||
public:
|
||||
explicit ConfigTokenizer(const std::string& env) {
|
||||
std::string buffer;
|
||||
for (char ch : env) {
|
||||
if (ch == ',' || ch == ':' || ch == '[' || ch == ']') {
|
||||
if (!buffer.empty()) {
|
||||
config_.emplace_back(std::move(buffer));
|
||||
buffer.clear();
|
||||
}
|
||||
config_.emplace_back(1, ch);
|
||||
} else if (!std::isspace(static_cast<unsigned char>(ch))) {
|
||||
buffer += ch;
|
||||
}
|
||||
}
|
||||
if (!buffer.empty()) {
|
||||
config_.emplace_back(std::move(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& operator[](size_t i) const {
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
i < config_.size(), "Index out of bounds in ConfigTokenizer");
|
||||
return config_[i];
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return config_.size();
|
||||
}
|
||||
|
||||
bool checkToken(size_t i, const std::string& token) const {
|
||||
checkIndex(i);
|
||||
return config_[i] == token;
|
||||
}
|
||||
|
||||
size_t toSizeT(size_t i) const {
|
||||
checkIndex(i);
|
||||
return std::stoull(config_[i]);
|
||||
}
|
||||
|
||||
double toDouble(size_t i) const {
|
||||
checkIndex(i);
|
||||
return std::stod(config_[i]);
|
||||
}
|
||||
|
||||
bool toBool(size_t i) const {
|
||||
checkIndex(i);
|
||||
const auto& token = config_[i];
|
||||
if (token == "True") {
|
||||
return true;
|
||||
} else if (token == "False") {
|
||||
return false;
|
||||
} else {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Expected 'True' or 'False' at index ",
|
||||
i,
|
||||
" in ConfigTokenizer but got '",
|
||||
token,
|
||||
"'");
|
||||
}
|
||||
}
|
||||
|
||||
// Skips the current token group and returns the index of the value token.
|
||||
// Assumes the current index `i` points to a key name in a key-value pair.
|
||||
size_t skipKey(size_t i) const {
|
||||
// Expect a colon after the key
|
||||
checkToken(++i, ":");
|
||||
|
||||
++i; // Move to the value
|
||||
checkIndex(i);
|
||||
if (config_[i] != "[") {
|
||||
// Value is a single token (not a list) -> return its index
|
||||
return i;
|
||||
}
|
||||
|
||||
// Skip tokens inside the list until matching ']'
|
||||
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
|
||||
while (++i < config_.size() && config_[i] != "]") {
|
||||
}
|
||||
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
i < config_.size(),
|
||||
"Expected closing bracket ']' in ConfigTokenizer but reached end of config");
|
||||
|
||||
return i; // Return the index of the closing ']'
|
||||
}
|
||||
|
||||
private:
|
||||
void checkIndex(size_t i) const {
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
i < config_.size(), "Index out of bounds in ConfigTokenizer");
|
||||
}
|
||||
|
||||
std::vector<std::string> config_;
|
||||
};
|
||||
|
||||
/**
|
||||
* Note [AcceleratorAllocatorConfig design]
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* This class configures memory allocation for both device and host memory. A
|
||||
* single `AcceleratorAllocatorConfig` instance is shared across all accelerator
|
||||
* backends, such as CUDA and XPU, under the assumption that relevant
|
||||
* environment variables apply uniformly to all accelerators. Device-specific
|
||||
* configuration extensions are supported via hooks (see
|
||||
* `registerDeviceConfigParserHook`).
|
||||
*
|
||||
* Recommended design:
|
||||
* - Place common configurations in `AcceleratorAllocatorConfig`.
|
||||
* - Extend backend-specific configurations in corresponding device-specific
|
||||
* classes, such as `CUDAAllocatorConfig`, etc.
|
||||
*
|
||||
* Scope:
|
||||
* - Configuration options must be environment-variable driven.
|
||||
*
|
||||
* Naming Convention:
|
||||
* - Public API names in `AcceleratorAllocatorConfig` should be device-generic.
|
||||
* - Members prefixed with `pinned_` are specific to the host/pinned allocator.
|
||||
* - Environment variable names should be generic across backends.
|
||||
* - Comma-separated key-value pairs in the format: `key:value`. Use square
|
||||
* brackets `[]` for list values Example: `key1:123, key2:[val1,val2]`
|
||||
*
|
||||
* Environment Variables:
|
||||
* - The primary environment variable for configuration is `PYTORCH_ALLOC_CONF`.
|
||||
* - For backward compatibility, `PYTORCH_CUDA_ALLOC_CONF` is also supported
|
||||
* with lower priority.
|
||||
*/
|
||||
|
||||
class C10_API AcceleratorAllocatorConfig {
|
||||
public:
|
||||
static AcceleratorAllocatorConfig& instance();
|
||||
|
||||
C10_DISABLE_COPY_AND_ASSIGN(AcceleratorAllocatorConfig);
|
||||
AcceleratorAllocatorConfig(AcceleratorAllocatorConfig&&) = delete;
|
||||
AcceleratorAllocatorConfig& operator=(AcceleratorAllocatorConfig&&) = delete;
|
||||
~AcceleratorAllocatorConfig() = default;
|
||||
|
||||
/* Device allocator settings */
|
||||
|
||||
// Returns the maximum block size (in MB) that is allowed to be split. The
|
||||
// default is unlimited (all blocks can be split).
|
||||
static size_t max_split_size() {
|
||||
return instance().max_split_size_;
|
||||
}
|
||||
|
||||
// Returns the maximum block size (in MB) that is allowed to be rounded up
|
||||
// without requiring splitting when searching for a free block. The default is
|
||||
// 20 MiB.
|
||||
static size_t max_non_split_rounding_size() {
|
||||
return instance().max_non_split_rounding_size_;
|
||||
}
|
||||
|
||||
// Return the number of divisions used when rounding up allocation sizes (in
|
||||
// MB) to the nearest power-of-2 boundary.
|
||||
static size_t roundup_power2_divisions(size_t size);
|
||||
|
||||
// Returns the vector of division factors used for rounding up allocation
|
||||
// sizes. These divisions apply to size intervals between 1MB and 64GB.
|
||||
static std::vector<size_t> roundup_power2_divisions() {
|
||||
return instance().roundup_power2_divisions_;
|
||||
}
|
||||
|
||||
// Returns the threshold that triggers garbage collection when the ratio of
|
||||
// used memory to maximum allowed memory exceeds this value. The default is 0,
|
||||
// meaning no garbage collection is triggered. The value should be in the
|
||||
// range (0.0, 1.0).
|
||||
static double garbage_collection_threshold() {
|
||||
return instance().garbage_collection_threshold_;
|
||||
}
|
||||
|
||||
// Returns whether the expandable segment feature is enabled. This allows the
|
||||
// allocator to start with one segment that grows as needed, rather than
|
||||
// creating a new segment for each allocation. Default is false (expandable
|
||||
// segments disabled).
|
||||
static bool use_expandable_segments() {
|
||||
return instance().use_expandable_segments_;
|
||||
}
|
||||
|
||||
/* Host allocator settings */
|
||||
|
||||
// Returns whether the pinned host allocator uses background threads for
|
||||
// processing events. This is useful for improving performance in scenarios
|
||||
// where many small allocations are made. Default is false (background threads
|
||||
// disabled).
|
||||
static bool pinned_use_background_threads() {
|
||||
return instance().pinned_use_background_threads_;
|
||||
}
|
||||
|
||||
/* Settings for both device and host allocator */
|
||||
|
||||
// Returns the current allocator settings as a string. This string is useful
|
||||
// to expand device-specific allocator configurations
|
||||
static std::string last_allocator_settings() {
|
||||
std::lock_guard<std::mutex> lock(instance().last_allocator_settings_mutex_);
|
||||
return instance().last_allocator_settings_;
|
||||
}
|
||||
|
||||
// Parses the environment variable `env` to update the allocator settings.
|
||||
// If the environment variable is not set, it does nothing.
|
||||
// The configuration string should be a comma-separated list of key-value
|
||||
// pairs, where each key is a configuration option and the value is the
|
||||
// corresponding setting. For example:
|
||||
// "max_split_size_mb:100,max_non_split_rounding_mb:20,garbage_collection_threshold:0.5,roundup_power2_divisions:[64:8,256:4,1024:4,>:1],expandable_segments:true,pinned_use_background_threads:true"
|
||||
void parseArgs(const std::string& env);
|
||||
|
||||
// Registers a device-specific configuration parser hook. This allows
|
||||
// backends to parse additional device-specific configuration options from the
|
||||
// environment variable. The hook should be a function that takes a string
|
||||
// (the environment variable value) and parses it to set device-specific
|
||||
// configuration options.
|
||||
// The hook will be called when the environment variable is parsed.
|
||||
// If a hook is already registered, it will be replaced with the new one.
|
||||
void registerDeviceConfigParserHook(
|
||||
std::function<void(const std::string&)> hook) {
|
||||
device_config_parser_hook_ = std::move(hook);
|
||||
}
|
||||
|
||||
// Calls the registered device-specific configuration parser hook with the
|
||||
// provided environment string. This allows backends to parse additional
|
||||
// device-specific configuration options from the environment variable.
|
||||
// If no hook is registered, this function does nothing.
|
||||
void callDeviceConfigParserHook(const std::string& env) const {
|
||||
if (device_config_parser_hook_) {
|
||||
device_config_parser_hook_(env);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
AcceleratorAllocatorConfig();
|
||||
|
||||
/* Internal functions for device allocator */
|
||||
|
||||
// Parse `max_split_size_mb` from environment variable.
|
||||
size_t parseMaxSplitSize(const ConfigTokenizer& tokenizer, size_t i);
|
||||
// Parse `max_non_split_rounding_mb` from environment variable.
|
||||
size_t parseMaxNonSplitRoundingSize(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i);
|
||||
// Parse `garbage_collection_threshold` from environment variable.
|
||||
size_t parseGarbageCollectionThreshold(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i);
|
||||
// Parse `roundup_power2_divisions` from environment variable.
|
||||
size_t parseRoundUpPower2Divisions(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i);
|
||||
// Parse `expandable_segments` from environment variable.
|
||||
size_t parseExpandableSegments(const ConfigTokenizer& tokenizer, size_t i);
|
||||
|
||||
/* Internal functions for host allocator */
|
||||
|
||||
// Parse `pinned_use_background_threads` from environment variable.
|
||||
size_t parsePinnedUseBackgroundThreads(
|
||||
const ConfigTokenizer& tokenizer,
|
||||
size_t i);
|
||||
|
||||
/* The following members are specifically used for the device allocator. */
|
||||
|
||||
// The maximum block size that is allowed to be split.
|
||||
std::atomic<size_t> max_split_size_{std::numeric_limits<size_t>::max()};
|
||||
// The maximum allowable extra size of a memory block without requiring
|
||||
// splitting when searching for a free block.
|
||||
std::atomic<size_t> max_non_split_rounding_size_{kLargeBuffer};
|
||||
// Used to store how memory allocations of different sizes should be rounded
|
||||
// up to the nearest power of 2 divisions.
|
||||
std::vector<size_t> roundup_power2_divisions_;
|
||||
// The threshold that triggers garbage collection when the ratio of used
|
||||
// memory to maximum allowed memory exceeds this value.
|
||||
std::atomic<double> garbage_collection_threshold_{0};
|
||||
// A flag to enable expandable segments feature.
|
||||
std::atomic<bool> use_expandable_segments_{false};
|
||||
|
||||
/* The following members are specifically used for the host allocator. */
|
||||
|
||||
// A flag to enable background thread for processing events.
|
||||
std::atomic<bool> pinned_use_background_threads_{false};
|
||||
|
||||
/* The following members are used for both device and host allocator. */
|
||||
|
||||
// Record the last allocator config environment setting.
|
||||
std::mutex last_allocator_settings_mutex_;
|
||||
std::string last_allocator_settings_;
|
||||
|
||||
// Optional hook for parsing additional device-specific allocator settings.
|
||||
// This allows backends (e.g., CUDA, XPU) to register a custom parser for
|
||||
// their own environment configuration extensions.
|
||||
std::function<void(const std::string&)> device_config_parser_hook_{nullptr};
|
||||
};
|
||||
|
||||
C10_API inline void setAllocatorSettings(const std::string& env) {
|
||||
AcceleratorAllocatorConfig::instance().parseArgs(env);
|
||||
AcceleratorAllocatorConfig::instance().callDeviceConfigParserHook(env);
|
||||
}
|
||||
|
||||
C10_API inline std::string getAllocatorSettings() {
|
||||
return AcceleratorAllocatorConfig::instance().last_allocator_settings();
|
||||
}
|
||||
|
||||
struct DeviceConfigParserHookRegistry {
|
||||
explicit DeviceConfigParserHookRegistry(
|
||||
std::function<void(const std::string&)> hook) {
|
||||
AcceleratorAllocatorConfig::instance().registerDeviceConfigParserHook(
|
||||
std::move(hook));
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER_ALLOCATOR_CONFIG_PARSE_HOOK(hook) \
|
||||
namespace { \
|
||||
static at::CachingAllocator::DeviceConfigParserHookRegistry \
|
||||
g_device_config_parse_hook_registry_instance(hook); \
|
||||
}
|
||||
|
||||
} // namespace c10::CachingAllocator
|
@ -1,123 +0,0 @@
|
||||
#include <c10/core/AllocatorConfig.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace c10::CachingAllocator;
|
||||
constexpr size_t kMB = 1024 * 1024ul;
|
||||
|
||||
struct ExtendedAllocatorConfig {
|
||||
static ExtendedAllocatorConfig& instance() {
|
||||
static ExtendedAllocatorConfig instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
// Returns the device-specific option value in bytes.
|
||||
static size_t device_specific_option() {
|
||||
return instance().device_specific_option_;
|
||||
}
|
||||
|
||||
void parseArgs(const std::string& env) {
|
||||
// Parse device-specific options from the environment variable
|
||||
ConfigTokenizer tokenizer(env);
|
||||
for (size_t i = 0; i < tokenizer.size(); i++) {
|
||||
const auto& key = tokenizer[i];
|
||||
if (key == "device_specific_option_mb") {
|
||||
tokenizer.checkToken(++i, ":");
|
||||
device_specific_option_ = tokenizer.toSizeT(++i) * kMB;
|
||||
} else {
|
||||
i = tokenizer.skipKey(i);
|
||||
}
|
||||
|
||||
if (i + 1 < tokenizer.size()) {
|
||||
tokenizer.checkToken(++i, ",");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Device-specific option, e.g., memory limit for a specific device.
|
||||
std::atomic<size_t> device_specific_option_{0};
|
||||
};
|
||||
|
||||
REGISTER_ALLOCATOR_CONFIG_PARSE_HOOK([](const std::string& env) {
|
||||
ExtendedAllocatorConfig::instance().parseArgs(env);
|
||||
})
|
||||
|
||||
TEST(AllocatorConfigTest, allocator_config_test) {
|
||||
std::string env =
|
||||
"max_split_size_mb:40,"
|
||||
"max_non_split_rounding_mb:30,"
|
||||
"garbage_collection_threshold:0.5,"
|
||||
"roundup_power2_divisions:[64:8,128:2,256:4,512:2,1024:4,>:1],"
|
||||
"expandable_segments:True,"
|
||||
"pinned_use_background_threads:True,"
|
||||
"device_specific_option_mb:64";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::max_split_size(), 40 * kMB);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::max_non_split_rounding_size(), 30 * kMB);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::garbage_collection_threshold(), 0.5);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(32 * kMB), 8);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 8);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 2);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 2);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 1);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 1);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(8192 * kMB), 1);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::use_expandable_segments(), true);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::pinned_use_background_threads(), true);
|
||||
EXPECT_EQ(ExtendedAllocatorConfig::device_specific_option(), 64 * kMB);
|
||||
|
||||
env =
|
||||
"max_split_size_mb:20,"
|
||||
"max_non_split_rounding_mb:40,"
|
||||
"garbage_collection_threshold:0.8";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::max_split_size(), 20 * kMB);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::max_non_split_rounding_size(), 40 * kMB);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::garbage_collection_threshold(), 0.8);
|
||||
|
||||
// roundup_power2_divisions knob array syntax
|
||||
env = "roundup_power2_divisions:[128:8,256:16,512:1,2048:8,>:2]";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 8);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 8);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 16);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 1);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 0);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 2);
|
||||
|
||||
// roundup_power2_divisions single value syntax for backward compatibility
|
||||
env = "roundup_power2_divisions:4";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 4);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4);
|
||||
EXPECT_EQ(
|
||||
AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 4);
|
||||
|
||||
env = "expandable_segments:False,";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::use_expandable_segments(), false);
|
||||
|
||||
env = "pinned_use_background_threads:False";
|
||||
c10::CachingAllocator::setAllocatorSettings(env);
|
||||
EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env);
|
||||
EXPECT_EQ(AcceleratorAllocatorConfig::pinned_use_background_threads(), false);
|
||||
}
|
Reference in New Issue
Block a user