From b51f66c1950a582dd18d1b2ee67df840a8c4dbbe Mon Sep 17 00:00:00 2001 From: cyy Date: Thu, 18 Apr 2024 13:35:48 +0000 Subject: [PATCH] [Environment Variable][1/N] Use thread-safe env variable API in c10 (#119449) This PR is the beginning of attempts to wrap thread-unsafe getenv and set_env functions inside a RW mutex. Pull Request resolved: https://github.com/pytorch/pytorch/pull/119449 Approved by: https://github.com/albanD --- c10/core/impl/alloc_cpu.cpp | 5 +- c10/cuda/CUDAAllocatorConfig.cpp | 8 +- c10/cuda/CUDAAllocatorConfig.h | 6 +- c10/cuda/CUDACachingAllocator.cpp | 9 +- c10/cuda/CUDADeviceAssertionHost.cpp | 5 +- c10/cuda/CUDAMiscFunctions.cpp | 8 +- c10/test/util/DeadlockDetection_test.cpp | 5 +- c10/util/DeadlockDetection.cpp | 5 +- c10/util/Logging.cpp | 10 +-- c10/util/env.cpp | 108 +++++++++++++++++++++++ c10/util/env.h | 46 ++++------ c10/util/tempfile.cpp | 8 +- 12 files changed, 163 insertions(+), 60 deletions(-) create mode 100644 c10/util/env.cpp diff --git a/c10/core/impl/alloc_cpu.cpp b/c10/core/impl/alloc_cpu.cpp index 9b7ae22f9f84..def4c3a3a975 100644 --- a/c10/core/impl/alloc_cpu.cpp +++ b/c10/core/impl/alloc_cpu.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -53,8 +54,8 @@ void memset_junk(void* data, size_t num) { #if defined(__linux__) && !defined(__ANDROID__) static inline bool is_thp_alloc_enabled() { static bool value = [&] { - const char* ptr = std::getenv("THP_MEM_ALLOC_ENABLE"); - return ptr != nullptr ? std::atoi(ptr) : 0; + auto env = c10::utils::check_env("THP_MEM_ALLOC_ENABLE"); + return env.has_value() ? env.value() : 0; }(); return value; } diff --git a/c10/cuda/CUDAAllocatorConfig.cpp b/c10/cuda/CUDAAllocatorConfig.cpp index 1f81ed47b67b..ca38dfd6a4e6 100644 --- a/c10/cuda/CUDAAllocatorConfig.cpp +++ b/c10/cuda/CUDAAllocatorConfig.cpp @@ -234,7 +234,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig( return i; } -void CUDAAllocatorConfig::parseArgs(const char* env) { +void CUDAAllocatorConfig::parseArgs(const std::optional& env) { // If empty, set the default values m_max_split_size = std::numeric_limits::max(); m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0); @@ -242,16 +242,16 @@ void CUDAAllocatorConfig::parseArgs(const char* env) { bool used_cudaMallocAsync = false; bool used_native_specific_option = false; - if (env == nullptr) { + if (!env.has_value()) { return; } { std::lock_guard lock(m_last_allocator_settings_mutex); - m_last_allocator_settings = env; + m_last_allocator_settings = env.value(); } std::vector config; - lexArgs(env, config); + lexArgs(env.value().c_str(), config); for (size_t i = 0; i < config.size(); i++) { std::string_view config_item_view(config[i]); diff --git a/c10/cuda/CUDAAllocatorConfig.h b/c10/cuda/CUDAAllocatorConfig.h index 3106fc1b46ba..db5c9e1c8f5c 100644 --- a/c10/cuda/CUDAAllocatorConfig.h +++ b/c10/cuda/CUDAAllocatorConfig.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -72,14 +73,13 @@ class C10_CUDA_API CUDAAllocatorConfig { static CUDAAllocatorConfig& instance() { static CUDAAllocatorConfig* s_instance = ([]() { auto inst = new CUDAAllocatorConfig(); - const char* env = getenv("PYTORCH_CUDA_ALLOC_CONF"); - inst->parseArgs(env); + inst->parseArgs(c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF")); return inst; })(); return *s_instance; } - void parseArgs(const char* env); + void parseArgs(const std::optional& env); private: CUDAAllocatorConfig(); diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp index c472e82ce2f1..afac5272b625 100644 --- a/c10/cuda/CUDACachingAllocator.cpp +++ b/c10/cuda/CUDACachingAllocator.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -2831,7 +2832,7 @@ class DeviceCachingAllocator { // errors, since the caching allocator foils cuda-memcheck. bool forceUncachedAllocator() { static bool force_uncached = - getenv("PYTORCH_NO_CUDA_MEMORY_CACHING") != nullptr; + c10::utils::has_env("PYTORCH_NO_CUDA_MEMORY_CACHING"); return force_uncached; } @@ -3363,9 +3364,9 @@ struct BackendStaticInitializer { // version checks, to CUDAAllocatorConfig's runtime doublecheck. If this // works, maybe we should move all of CUDAAllocatorConfig here? CUDAAllocator* parseEnvForBackend() { - const char* val = getenv("PYTORCH_CUDA_ALLOC_CONF"); - if (val != nullptr) { - const std::string config(val); + const auto val = c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF"); + if (val.has_value()) { + const std::string& config = val.value(); std::regex exp("[\\s,]+"); std::sregex_token_iterator it(config.begin(), config.end(), exp, -1); diff --git a/c10/cuda/CUDADeviceAssertionHost.cpp b/c10/cuda/CUDADeviceAssertionHost.cpp index 1d52af781227..ec41e6230fb0 100644 --- a/c10/cuda/CUDADeviceAssertionHost.cpp +++ b/c10/cuda/CUDADeviceAssertionHost.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,8 @@ bool dsa_check_if_all_devices_support_managed_memory() { } bool env_flag_set(const char* env_var_name) { - const char* const env_string = std::getenv(env_var_name); - return (env_string == nullptr) ? false : std::strcmp(env_string, "0"); + const auto env_flag = c10::utils::check_env(env_var_name); + return env_flag.has_value() && env_flag.value(); } /// Deleter for UVM/managed memory pointers diff --git a/c10/cuda/CUDAMiscFunctions.cpp b/c10/cuda/CUDAMiscFunctions.cpp index 11ea77536662..9ef724813e0e 100644 --- a/c10/cuda/CUDAMiscFunctions.cpp +++ b/c10/cuda/CUDAMiscFunctions.cpp @@ -1,12 +1,14 @@ #include -#include +#include namespace c10::cuda { +// NOLINTNEXTLINE(bugprone-exception-escape,-warnings-as-errors) const char* get_cuda_check_suffix() noexcept { - static char* device_blocking_flag = getenv("CUDA_LAUNCH_BLOCKING"); + static auto device_blocking_flag = + c10::utils::check_env("CUDA_LAUNCH_BLOCKING"); static bool blocking_enabled = - (device_blocking_flag && atoi(device_blocking_flag)); + (device_blocking_flag.has_value() && device_blocking_flag.value()); if (blocking_enabled) { return ""; } else { diff --git a/c10/test/util/DeadlockDetection_test.cpp b/c10/test/util/DeadlockDetection_test.cpp index 35c4953f6d33..05ae154e224a 100644 --- a/c10/test/util/DeadlockDetection_test.cpp +++ b/c10/test/util/DeadlockDetection_test.cpp @@ -1,9 +1,8 @@ #include +#include #include -#include - using namespace ::testing; using namespace c10::impl; @@ -23,7 +22,7 @@ TEST(DeadlockDetection, basic) { #ifndef _WIN32 TEST(DeadlockDetection, disable) { - setenv("TORCH_DISABLE_DEADLOCK_DETECTION", "1", 1); + c10::utils::set_env("TORCH_DISABLE_DEADLOCK_DETECTION", "1"); DummyPythonGILHooks hooks; SetPythonGILHooks(&hooks); SetPythonGILHooks(&hooks); diff --git a/c10/util/DeadlockDetection.cpp b/c10/util/DeadlockDetection.cpp index 320fa7873c6f..4b00d24534a8 100644 --- a/c10/util/DeadlockDetection.cpp +++ b/c10/util/DeadlockDetection.cpp @@ -1,6 +1,5 @@ #include - -#include +#include namespace c10::impl { @@ -8,7 +7,7 @@ namespace { PythonGILHooks* python_gil_hooks = nullptr; bool disable_detection() { - return std::getenv("TORCH_DISABLE_DEADLOCK_DETECTION") != nullptr; + return c10::utils::has_env("TORCH_DISABLE_DEADLOCK_DETECTION"); } } // namespace diff --git a/c10/util/Logging.cpp b/c10/util/Logging.cpp index e9c9e9c2f306..17459f69fafc 100644 --- a/c10/util/Logging.cpp +++ b/c10/util/Logging.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef FBCODE_CAFFE2 #include #endif @@ -10,7 +11,6 @@ #endif #include -#include #include // Common code that we use regardless of whether we use glog or not. @@ -94,8 +94,8 @@ using DDPUsageLoggerType = std::function; namespace { bool IsAPIUsageDebugMode() { - const char* val = getenv("PYTORCH_API_USAGE_STDERR"); - return val && *val; // any non-empty value + auto val = c10::utils::get_env("PYTORCH_API_USAGE_STDERR"); + return val.has_value() && !val.value().empty(); // any non-empty value } void APIUsageDebug(const string& event) { @@ -438,10 +438,10 @@ namespace c10::detail { namespace { void setLogLevelFlagFromEnv() { - const char* level_str = std::getenv("TORCH_CPP_LOG_LEVEL"); + auto level_env = c10::utils::get_env("TORCH_CPP_LOG_LEVEL"); // Not set, fallback to the default level (i.e. WARNING). - std::string level{level_str != nullptr ? level_str : ""}; + std::string level{level_env.has_value() ? level_env.value() : ""}; if (level.empty()) { return; } diff --git a/c10/util/env.cpp b/c10/util/env.cpp new file mode 100644 index 000000000000..865c6b949706 --- /dev/null +++ b/c10/util/env.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include + +namespace c10::utils { + +static std::shared_mutex env_mutex; + +// Set an environment variable. +void set_env(const char* name, const char* value, bool overwrite) { + std::lock_guard lk(env_mutex); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif +#ifdef _MSC_VER + if (!overwrite) { + // NOLINTNEXTLINE(concurrency-mt-unsafe) + if (std::getenv(name) != nullptr) { + return; + } + } + auto full_env_variable = fmt::format("{}={}", name, value); + // NOLINTNEXTLINE(concurrency-mt-unsafe) + auto err = putenv(full_env_variable.c_str()); + TORCH_INTERNAL_ASSERT( + err == 0, + "putenv failed for environment \"", + name, + "\", the error is: ", + err); +#else + // NOLINTNEXTLINE(concurrency-mt-unsafe) + auto err = setenv(name, value, static_cast(overwrite)); + TORCH_INTERNAL_ASSERT( + err == 0, + "setenv failed for environment \"", + name, + "\", the error is: ", + err); +#endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif + return; +} + +// Checks an environment variable is set. +bool has_env(const char* name) noexcept { + std::shared_lock lk(env_mutex); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + // NOLINTNEXTLINE(concurrency-mt-unsafe) + auto envar = std::getenv(name); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + return envar != nullptr; +} + +// Reads an environment variable and returns the content if it is set +std::optional get_env(const char* name) noexcept { + std::shared_lock lk(env_mutex); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + // NOLINTNEXTLINE(concurrency-mt-unsafe) + auto envar = std::getenv(name); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + if (envar != nullptr) { + return std::string(envar); + } + return std::nullopt; +} + +// Reads an environment variable and returns +// - optional, if set equal to "1" +// - optional, if set equal to "0" +// - nullopt, otherwise +// +// NB: +// Issues a warning if the value of the environment variable is not 0 or 1. +std::optional check_env(const char* name) { + auto env_opt = get_env(name); + if (env_opt.has_value()) { + if (*env_opt == "0") { + return false; + } + if (*env_opt == "1") { + return true; + } + TORCH_WARN( + "Ignoring invalid value for boolean flag ", + name, + ": ", + *env_opt, + "valid values are 0 or 1."); + } + return std::nullopt; +} +} // namespace c10::utils diff --git a/c10/util/env.h b/c10/util/env.h index 3db116c7db7a..04b758586138 100644 --- a/c10/util/env.h +++ b/c10/util/env.h @@ -1,11 +1,20 @@ #pragma once -#include -#include -#include +#include #include +#include namespace c10::utils { + +// Set an environment variable. +C10_API void set_env( + const char* name, + const char* value, + bool overwrite = true); + +// Checks an environment variable is set. +C10_API bool has_env(const char* name) noexcept; + // Reads an environment variable and returns // - optional, if set equal to "1" // - optional, if set equal to "0" @@ -13,29 +22,10 @@ namespace c10::utils { // // NB: // Issues a warning if the value of the environment variable is not 0 or 1. -inline std::optional check_env(const char* name) { -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -#endif - auto envar = std::getenv(name); -#ifdef _MSC_VER -#pragma warning(pop) -#endif - if (envar) { - if (strcmp(envar, "0") == 0) { - return false; - } - if (strcmp(envar, "1") == 0) { - return true; - } - TORCH_WARN( - "Ignoring invalid value for boolean flag ", - name, - ": ", - envar, - "valid values are 0 or 1."); - } - return std::nullopt; -} +C10_API std::optional check_env(const char* name); + +// Reads the value of an environment variable if it is set. +// However, check_env should be used if the value is assumed to be a flag. +C10_API std::optional get_env(const char* name) noexcept; + } // namespace c10::utils diff --git a/c10/util/tempfile.cpp b/c10/util/tempfile.cpp index 28c3c7f14fd0..f106885a88b6 100644 --- a/c10/util/tempfile.cpp +++ b/c10/util/tempfile.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -22,10 +23,11 @@ static std::string make_filename(std::string_view name_prefix) { // We see if any of these environment variables is set and use their value, or // else default the temporary directory to `/tmp`. - const char* tmp_directory = "/tmp"; + std::string tmp_directory = "/tmp"; for (const char* variable : {"TMPDIR", "TMP", "TEMP", "TEMPDIR"}) { - if (const char* path = getenv(variable)) { - tmp_directory = path; + auto path_opt = c10::utils::get_env(variable); + if (path_opt.has_value()) { + tmp_directory = path_opt.value(); break; } }