Refine Allocator Config error message friendly (#165288)

* __->__ #165288 Pull Request resolved: https://github.com/pytorch/pytorch/pull/165288 Approved by: https://github.com/albanD
2025-10-20 12:54:11 +08:00 · 2025-10-17 17:16:41 +00:00
parent 5d62b63a76
commit 4888ed440e
4 changed files with 23 additions and 22 deletions
--- a/c10/core/AllocatorConfig.cpp
+++ b/c10/core/AllocatorConfig.cpp
@ -46,7 +46,7 @@ size_t AcceleratorAllocatorConfig::roundup_power2_divisions(size_t size) {
      63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoStart);
  const size_t interval_end =
      63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoEnd);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      interval_end - interval_start == kRoundUpPowerOfTwoIntervals,
      "kRoundUpPowerOfTwoIntervals mismatch");

@ -65,7 +65,7 @@ size_t AcceleratorAllocatorConfig::parseMaxSplitSize(
      std::numeric_limits<size_t>::max() / kMB;

  size_t val_env = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      val_env >= min_allowed_split_size_mb,
      "CachingAllocator option max_split_size_mb too small, must be >= ",
      min_allowed_split_size_mb);
@ -84,7 +84,7 @@ size_t AcceleratorAllocatorConfig::parseMaxNonSplitRoundingSize(
      std::numeric_limits<size_t>::max() / kMB;

  size_t val_env = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      val_env >= min_allowed_split_size_mb,
      "CachingAllocator option max_non_split_rounding_mb too small, must be >= ",
      min_allowed_split_size_mb);
@ -99,7 +99,7 @@ size_t AcceleratorAllocatorConfig::parseGarbageCollectionThreshold(
    size_t i) {
  tokenizer.checkToken(++i, ":");
  double val_env = tokenizer.toDouble(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      val_env > 0 && val_env < 1.0,
      "garbage_collect_threshold is invalid, set it in (0.0, 1.0)");
  garbage_collection_threshold_ = val_env;
@ -120,7 +120,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
      size_t value_index = i;
      tokenizer.checkToken(++i, ":");
      size_t value = tokenizer.toSizeT(++i);
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
          value == 0 || llvm::isPowerOf2_64(value),
          "For roundups, the divisions has to be power of 2 or 0 to disable roundup ");

@ -133,7 +133,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
            value);
      } else {
        size_t boundary = tokenizer.toSizeT(value_index);
-        TORCH_CHECK(
+        TORCH_CHECK_VALUE(
            llvm::isPowerOf2_64(boundary),
            "For roundups, the intervals have to be power of 2 ");

@ -163,7 +163,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
        "Expected closing bracket ']' in ConfigTokenizer but reached end of config");
  } else { // Keep this for backwards compatibility
    size_t value = tokenizer.toSizeT(i);
-    TORCH_CHECK(
+    TORCH_CHECK_VALUE(
        llvm::isPowerOf2_64(value),
        "For roundups, the divisions has to be power of 2 ");
    std::fill(
@ -223,7 +223,7 @@ void AcceleratorAllocatorConfig::parseArgs(const std::string& env) {
      // If a device-specific configuration parser hook is registered, it will
      // check if the key is unrecognized.
      if (device_config_parser_hook_) {
-        TORCH_CHECK(
+        TORCH_CHECK_VALUE(
            getKeys().find(key) != getKeys().end(),
            "Unrecognized key '",
            key,
--- a/c10/core/AllocatorConfig.h
+++ b/c10/core/AllocatorConfig.h
@ -76,7 +76,7 @@ class ConfigTokenizer {
    } else if (token == "False") {
      return false;
    } else {
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
          false,
          "Expected 'True' or 'False' at index ",
          i,
@ -253,7 +253,7 @@ class C10_API AcceleratorAllocatorConfig {
    device_config_parser_hook_ = std::move(hook);
    auto& mutable_keys = getMutableKeys();
    for (auto& key : keys) {
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
          mutable_keys.insert(key).second,
          "Duplicated key '",
          key,
--- a/c10/cuda/CUDAAllocatorConfig.cpp
+++ b/c10/cuda/CUDAAllocatorConfig.cpp
@ -20,7 +20,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
  tokenizer.checkToken(++i, ":");
  i++; // Move to the value after the colon
 #ifdef USE_ROCM
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      ((tokenizer[i] == "native") || (tokenizer[i] == PYTORCH_TOKEN1) ||
       (tokenizer[i] == PYTORCH_TOKEN2)),
      "Unknown allocator backend, "
@ -36,7 +36,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
      " != ",
      get()->name());
 #else // USE_ROCM
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      ((tokenizer[i] == "native") || (tokenizer[i] == PYTORCH_TOKEN1)),
      "Unknown allocator backend, "
      "options are native and " PYTORCH_TOKEN1);
@ -109,7 +109,7 @@ void CUDAAllocatorConfig::parseArgs(const std::string& env) {
    } else {
      const auto& keys =
          c10::CachingAllocator::AcceleratorAllocatorConfig::getKeys();
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
          keys.find(key) != keys.end(),
          "Unrecognized key '",
          key,
@ -151,12 +151,12 @@ size_t CUDAAllocatorConfig::parsePinnedNumRegisterThreads(
    size_t i) {
  tokenizer.checkToken(++i, ":");
  size_t val2 = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      llvm::isPowerOf2_64(val2),
      "Number of register threads has to be power of 2, got ",
      val2);
  auto maxThreads = CUDAAllocatorConfig::pinned_max_register_threads();
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
      val2 <= maxThreads,
      "Number of register threads should be less than or equal to ",
      maxThreads,
@ -171,7 +171,8 @@ size_t CUDAAllocatorConfig::parsePinnedReserveSegmentSize(
    size_t i) {
  tokenizer.checkToken(++i, ":");
  size_t val2 = tokenizer.toSizeT(++i);
-  TORCH_CHECK(val2 > 0, "Pinned reserve segment size has to be greater than 0");
+  TORCH_CHECK_VALUE(
+      val2 > 0, "Pinned reserve segment size has to be greater than 0");
  m_pinned_reserve_segment_size_mb = val2;
  return i;
 }
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@ -4582,21 +4582,21 @@ class TestCudaMallocAsync(TestCase):
        reg_mem = torch.cuda.memory_stats()[key_allocated]
        self.assertEqual(reg_mem - start_mem, nbytes)

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings("foo:1,bar:2")

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings(
                "garbage_collection_threshold:1.2"
            )

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings("max_split_size_mb:2")

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings("release_lock_on_cudamalloc:none")

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings(
                "pinned_use_cuda_host_register:none"
            )
@ -4606,7 +4606,7 @@ class TestCudaMallocAsync(TestCase):
                "pinned_num_register_threads:none"
            )

-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
            torch.cuda.memory._set_allocator_settings(
                "pinned_num_register_threads:1024"
            )