[Environment Variable][7/N] Use thread-safe getenv functions (#140211)

Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/140211 Approved by: https://github.com/ezyang, https://github.com/eqy
2025-10-20 21:14:14 +08:00 · 2025-04-24 01:06:29 +00:00
parent 8172397025
commit f2cfeb23e5
17 changed files with 61 additions and 47 deletions
--- a/aten/src/ATen/core/Vitals.cpp
+++ b/aten/src/ATen/core/Vitals.cpp
@ -1,4 +1,5 @@
 #include <ATen/core/Vitals.h>
+#include <c10/util/env.h>
 #include <cstdlib>
 #include <iostream>

@ -41,9 +42,9 @@ bool torchVitalEnabled() {
  // If this is a performance hit, make `enabled` variable static
  // and return `const bool&` instead
  bool enabled = []() {
-    auto e = getenv("TORCH_VITAL");
-    if (e != nullptr) {
-      return e[0] != '\0';
+    auto const e = c10::utils::get_env("TORCH_VITAL");
+    if (e.has_value()) {
+      return !e.value().empty();
    }
    return false;
  }();
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
@ -114,14 +114,14 @@ void clearCublasWorkspaces() {
 }

 size_t parseChosenWorkspaceSize() {
-  const char * val = getenv("CUBLAS_WORKSPACE_CONFIG");
+  auto val = c10::utils::get_env("CUBLAS_WORKSPACE_CONFIG");
 #ifdef USE_ROCM
  if (!val) {
-    val = getenv("HIPBLAS_WORKSPACE_CONFIG");
+    val = c10::utils::get_env("HIPBLAS_WORKSPACE_CONFIG");
  }
  if (!val) {
    // for extra convenience
-    val = getenv("ROCBLAS_WORKSPACE_CONFIG");
+    val = c10::utils::get_env("ROCBLAS_WORKSPACE_CONFIG");
  }
  /* 32MiB default, 128MiB for gfx94x/gfx95x */
  const bool gfx94_95 = at::detail::getCUDAHooks().isGPUArch({"gfx94", "gfx95"});
@ -135,7 +135,7 @@ size_t parseChosenWorkspaceSize() {

  if (val) {
    size_t total_size = 0;
-    const std::string config(val);
+    const std::string& config(val.value());
    std::regex exp(":([0-9]+):([0-9]+)");
    std::sregex_iterator next(config.begin(), config.end(), exp);
    std::sregex_iterator end;
--- a/aten/src/ATen/cuda/tunable/GemmHipblaslt.h
+++ b/aten/src/ATen/cuda/tunable/GemmHipblaslt.h
@ -382,7 +382,7 @@ static hipblasOperation_t MapLayoutToHipBlasLt(BlasOp layout) {
 }

 static size_t GetHipblasltWorkspaceSize() {
-  static const char * env = getenv("HIPBLASLT_WORKSPACE_SIZE");
+  static const auto env = c10::utils::get_env("HIPBLASLT_WORKSPACE_SIZE");
  // 256MB is max workspace size allowed for hipblaslt
  // hipblaslt-bench uses 32MB
  // recommendation from hipblaslt author was 76MB
@ -391,7 +391,7 @@ static size_t GetHipblasltWorkspaceSize() {
  size_t workspace_size = 76*1024;
  if (env) {
    try {
-      workspace_size = std::stoi(env);
+      workspace_size = std::stoi(env.value());
    } catch(std::invalid_argument const& e) {
      TORCH_WARN("invalid HIPBLASLT_WORKSPACE_SIZE,",
                 " using default workspace size of ", workspace_size, " KiB.");
--- a/aten/src/ATen/cuda/tunable/Tunable.cpp
+++ b/aten/src/ATen/cuda/tunable/Tunable.cpp
@ -524,8 +524,8 @@ void TuningContext::EnableNumericsCheck(bool value) {
 }

 bool TuningContext::IsNumericsCheckEnabled() const {
-  const char *env = getenv("PYTORCH_TUNABLEOP_NUMERICAL_CHECK");
-  if (env != nullptr && strcmp(env, "1") == 0) {
+  const auto env = c10::utils::get_env("PYTORCH_TUNABLEOP_NUMERICAL_CHECK");
+  if (env == "1") {
    return true;
  }
  return numerics_check_enable_;
--- a/aten/src/ATen/mps/MPSAllocator.mm
+++ b/aten/src/ATen/mps/MPSAllocator.mm
@ -5,6 +5,7 @@
 #include <ATen/mps/MPSAllocator.h>
 #include <c10/core/Allocator.h>
 #include <c10/core/Storage.h>
+#include <c10/util/env.h>

 #include <iostream>

@ -21,19 +22,19 @@ void MPSHeapAllocatorImpl::init_allocator() {
  init_buffer_pools();

  // debug verbosity flags (see DebugVerbosity enum)
-  static const char* verbosity_str = getenv("PYTORCH_DEBUG_MPS_ALLOCATOR");
-  m_debug_verbosity = verbosity_str ? strtol(verbosity_str, nullptr, 0) : DebugVerbosity::SILENT;
+  static const auto verbosity_str = c10::utils::get_env("PYTORCH_DEBUG_MPS_ALLOCATOR");
+  m_debug_verbosity = verbosity_str ? strtol(verbosity_str->c_str(), nullptr, 0) : DebugVerbosity::SILENT;

-  static const char* high_watermark_ratio_str = getenv("PYTORCH_MPS_HIGH_WATERMARK_RATIO");
+  static const auto high_watermark_ratio_str = c10::utils::get_env("PYTORCH_MPS_HIGH_WATERMARK_RATIO");
  const double high_watermark_ratio =
-      high_watermark_ratio_str ? strtod(high_watermark_ratio_str, nullptr) : default_high_watermark_ratio;
+      high_watermark_ratio_str ? strtod(high_watermark_ratio_str->c_str(), nullptr) : default_high_watermark_ratio;
  setHighWatermarkRatio(high_watermark_ratio);

  const double default_low_watermark_ratio =
      m_device.hasUnifiedMemory ? default_low_watermark_ratio_unified : default_low_watermark_ratio_discrete;
-  static const char* low_watermark_ratio_str = getenv("PYTORCH_MPS_LOW_WATERMARK_RATIO");
+  static const auto low_watermark_ratio_str = c10::utils::get_env("PYTORCH_MPS_LOW_WATERMARK_RATIO");
  const double low_watermark_ratio =
-      low_watermark_ratio_str ? strtod(low_watermark_ratio_str, nullptr) : default_low_watermark_ratio;
+      low_watermark_ratio_str ? strtod(low_watermark_ratio_str->c_str(), nullptr) : default_low_watermark_ratio;
  setLowWatermarkRatio(low_watermark_ratio);
 }

--- a/aten/src/ATen/mps/MPSFallback.mm
+++ b/aten/src/ATen/mps/MPSFallback.mm
@ -2,6 +2,7 @@

 #include <ATen/mps/MPSProfiler.h>
 #include <ATen/native/CPUFallback.h>
+#include <c10/util/env.h>
 #include <caffe2/core/common.h>

 namespace at {
@ -76,8 +77,8 @@ static Tensor slow_conv2d_forward_mps(const Tensor& self,
 }

 TORCH_LIBRARY_IMPL(_, MPS, m) {
-  static const char* enable_mps_fallback = getenv("PYTORCH_ENABLE_MPS_FALLBACK");
-  if (!enable_mps_fallback || std::stoi(enable_mps_fallback) == 0) {
+  static const auto enable_mps_fallback = c10::utils::get_env("PYTORCH_ENABLE_MPS_FALLBACK");
+  if (!enable_mps_fallback || enable_mps_fallback == "0") {
    m.fallback(torch::CppFunction::makeFromBoxedFunction<&mps_error_fallback>());
  } else {
    m.fallback(torch::CppFunction::makeFromBoxedFunction<&mps_fallback>());
--- a/aten/src/ATen/mps/MPSProfiler.mm
+++ b/aten/src/ATen/mps/MPSProfiler.mm
@ -2,6 +2,7 @@

 #include <ATen/mps/MPSProfiler.h>
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 #include <fmt/format.h>

 // these need to be literal strings when passed to os_signpost*()
@ -91,11 +92,11 @@ std::string CopyInfo::buildTensorString(const void* buffer, const OptionalTensor

 MPSProfiler::MPSProfiler() : m_os_log_events(nullptr), m_os_log_intervals(nullptr) {
  // see enum LogOptions for the description.
-  static const char* log_options_str = getenv(kEVLogProfileInfoStr);
-  m_log_options = log_options_str ? strtol(log_options_str, nullptr, 0) : 0;
+  static const auto log_options_str = c10::utils::get_env(kEVLogProfileInfoStr);
+  m_log_options = log_options_str ? strtol(log_options_str->c_str(), nullptr, 0) : 0;
  // see enums profilerOptions and SignpostTypes for the description.
-  static const char* trace_signpost_str = getenv(kEVTraceSignpostsStr);
-  uint32_t trace_signposts = trace_signpost_str ? strtol(trace_signpost_str, nullptr, 0) : 0;
+  static const auto trace_signpost_str = c10::utils::get_env(kEVTraceSignpostsStr);
+  uint32_t trace_signposts = trace_signpost_str ? strtol(trace_signpost_str->c_str(), nullptr, 0) : 0;

  TORCH_CHECK(m_log_options <= LogOptions::LOG_COUNT,
              "invalid log options ",
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@ -258,12 +258,12 @@ static int getLRUCacheLimit() {
  // 0 is used to indicate no limit
  // negative values are used to indicate no caching
  static int limit = [&] {
-    const char* val = getenv("TORCH_CUDNN_V8_API_LRU_CACHE_LIMIT");
+    const auto val = c10::utils::get_env("TORCH_CUDNN_V8_API_LRU_CACHE_LIMIT");
    if (!val) {
      return DEFAULT_LIMIT;
    }
    try {
-      return std::stoi(val);
+      return std::stoi(val.value());
    } catch (std::invalid_argument const&) {
      TORCH_WARN(
          "invalid TORCH_CUDNN_V8_API_LRU_CACHE_LIMIT,",
--- a/aten/src/ATen/native/mps/OperationUtils.mm
+++ b/aten/src/ATen/native/mps/OperationUtils.mm
@ -21,6 +21,7 @@
 #include <ATen/ops/scalar_tensor.h>
 #endif

+#include <c10/util/env.h>
 #include <mach-o/dyld.h>
 #include <mach-o/getsect.h>

@ -854,8 +855,8 @@ id<MTLLibrary> MetalShaderLibrary::getLibrary(const std::initializer_list<std::s

 id<MTLLibrary> MetalShaderLibrary::compileLibrary(const std::string& src) {
  static auto fast_math = []() {
-    auto val = std::getenv("PYTORCH_MPS_FAST_MATH");
-    return val && std::stoi(val) != 0;
+    auto const val = c10::utils::get_env("PYTORCH_MPS_FAST_MATH");
+    return val.has_value() && val != "0";
  }();
  NSError* error = nil;
  MTLCompileOptions* options = compile_options;
--- a/aten/src/ATen/native/mps/operations/LinearAlgebra.mm
+++ b/aten/src/ATen/native/mps/operations/LinearAlgebra.mm
@ -34,6 +34,7 @@
 #include <ATen/ops/triangular_solve_native.h>
 #endif

+#include <c10/util/env.h>
 #include <algorithm>

 namespace at::native {
@ -129,7 +130,7 @@ std::tuple<MPSGraphTensor*, MPSGraphTensor*, MPSGraphTensor*> do_mm(MPSGraph* gr
 }

 bool use_metal_mm(const Tensor& self, const Tensor& other, const Tensor& output) {
-  static bool always_use_metal = std::getenv("PYTORCH_MPS_PREFER_METAL") != nullptr;
+  static bool always_use_metal = c10::utils::has_env("PYTORCH_MPS_PREFER_METAL");
  constexpr auto max_stride_size = 32768;
  static bool is_macos_14_4_or_newer = is_macos_13_or_newer(MacOSVersion::MACOS_VER_14_4_PLUS);
  if (always_use_metal || c10::isIntegralType(self.scalar_type(), true)) {
--- a/c10/util/env.cpp
+++ b/c10/util/env.cpp
@ -7,11 +7,14 @@

 namespace c10::utils {

-static std::shared_mutex env_mutex;
+static std::shared_mutex& get_env_mutex() {
+  static std::shared_mutex env_mutex;
+  return env_mutex;
+}

 // Set an environment variable.
 void set_env(const char* name, const char* value, bool overwrite) {
-  std::lock_guard lk(env_mutex);
+  std::lock_guard lk(get_env_mutex());
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 4996)
@ -46,7 +49,7 @@ void set_env(const char* name, const char* value, bool overwrite) {

 // Reads an environment variable and returns the content if it is set
 std::optional<std::string> get_env(const char* name) noexcept {
-  std::shared_lock lk(env_mutex);
+  std::shared_lock lk(get_env_mutex());
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 4996)
--- a/torch/csrc/autograd/engine.cpp
+++ b/torch/csrc/autograd/engine.cpp
@ -289,8 +289,10 @@ void Engine::stop() {
  stopped_ = true;
  // Under some conditions, autograd threads can hang on shutdown
  // Do not wait for them to shutdown indefinitely but rely on timeout
-  auto wait_duration_str = getenv("TORCH_AUTOGRAD_SHUTDOWN_WAIT_LIMIT");
-  auto wait_duration = wait_duration_str ? std::atof(wait_duration_str) : 10.0;
+  auto wait_duration_str =
+      c10::utils::get_env("TORCH_AUTOGRAD_SHUTDOWN_WAIT_LIMIT");
+  auto wait_duration =
+      wait_duration_str ? std::atof(wait_duration_str->c_str()) : 10.0;
  bool noBackward = true;
  for (auto& queue : device_ready_queues_) {
    noBackward = noBackward && queue->empty();
--- a/torch/csrc/cuda/nccl.cpp
+++ b/torch/csrc/cuda/nccl.cpp
@ -173,10 +173,10 @@ bool nccl_use_nonblocking() {
 static int nccl_nonblocking_timeout() {
  static int timeout = -2; // -2 means not initialized
  if (timeout == -2) {
-    const char* val = getenv("TORCH_NCCL_NONBLOCKING_TIMEOUT");
-    if (val && strlen(val) > 0) {
+    const auto val = c10::utils::get_env("TORCH_NCCL_NONBLOCKING_TIMEOUT");
+    if (val && !val.value().empty()) {
      // NOLINTNEXTLINE(*-narrowing-conversions)
-      timeout = strtol(val, nullptr, 0);
+      timeout = strtol(val->c_str(), nullptr, 0);
    } else {
      // Default value consistent with kBackendDefaultTimeout
      timeout = 30 * 60;
--- a/torch/csrc/distributed/c10d/CUDASymmetricMemory.cu
+++ b/torch/csrc/distributed/c10d/CUDASymmetricMemory.cu
@ -6,6 +6,7 @@
 #include <ATen/cuda/CUDAContext.h>
 #include <c10/cuda/CUDACachingAllocator.h>
 #include <c10/cuda/CUDAGuard.h>
+#include <c10/util/env.h>
 #include <c10/util/error.h>

 #if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
@ -168,10 +169,10 @@ class IpcChannel {

 private:
  static std::string get_socket_name(int pid) {
-    const char* tmp_dir = "/tmp";
+    std::string tmp_dir = "/tmp";
    for (const char* env_var : {"TMPDIR", "TMP", "TEMP", "TEMPDIR"}) {
-      if (const char* path = getenv(env_var)) {
-        tmp_dir = path;
+      if (const auto path = c10::utils::get_env(env_var)) {
+        tmp_dir = path.value();
        break;
      }
    }
--- a/torch/csrc/distributed/c10d/init.cpp
+++ b/torch/csrc/distributed/c10d/init.cpp
@ -2987,9 +2987,10 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
            bool lazyInit = ::c10d::getDefaultGlooLazyInit();

            // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set.
-            char* ifnameEnv = getenv(GLOO_SOCKET_IFNAME_ENV.c_str());
-            if (ifnameEnv && strlen(ifnameEnv) > 1) {
-              for (const auto& iface : ::c10d::split(',', ifnameEnv)) {
+            auto ifnameEnv =
+                c10::utils::get_env(GLOO_SOCKET_IFNAME_ENV.c_str());
+            if (ifnameEnv && ifnameEnv->size() > 1) {
+              for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) {
                options->devices.push_back(
                    ::c10d::ProcessGroupGloo::createDeviceForInterface(
                        iface, lazyInit));
--- a/torch/csrc/jit/codegen/fuser/compiler.cpp
+++ b/torch/csrc/jit/codegen/fuser/compiler.cpp
@ -68,8 +68,8 @@ size_t nCompiledKernels() {

 int debugFuser() {
  if (debug_fusion < 0) {
-    const char* debug_env = getenv("PYTORCH_FUSION_DEBUG");
-    debug_fusion = debug_env ? atoi(debug_env) : 0;
+    const auto debug_env = c10::utils::get_env("PYTORCH_FUSION_DEBUG");
+    debug_fusion = debug_env ? atoi(debug_env->c_str()) : 0;
  }
  return debug_fusion;
 }
--- a/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
+++ b/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
@ -3,6 +3,7 @@
 #include <ATen/DynamicLibrary.h>
 #include <ATen/code_template.h>
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 #include <torch/csrc/jit/codegen/fuser/compiler.h>
 #include <torch/csrc/jit/codegen/fuser/cpu/temp_file.h>
 #include <optional>
@ -171,9 +172,9 @@ static bool programExists(const std::string& program) {
 // of compilation attempts.
 struct CompilerConfig {
  CompilerConfig() {
-    const char* cxx_env = getenv("CXX");
-    if (cxx_env != nullptr) {
-      cxx = cxx_env;
+    const auto cxx_env = c10::utils::get_env("CXX");
+    if (cxx_env) {
+      cxx = cxx_env.value();
    }

 #ifdef _MSC_VER