mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[Clang-tidy header][24/N] Fix clang-tidy warnings on c10/cuda/*.{cpp,h} (#120781)
This PR begins to clean clang-tidy warnings of code in c10/cuda. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120781 Approved by: https://github.com/ezyang
This commit is contained in:
@ -67,7 +67,7 @@ void CUDAAllocatorConfig::consumeToken(
|
||||
size_t i,
|
||||
const char c) {
|
||||
TORCH_CHECK(
|
||||
i < config.size() && config[i].compare(std::string(1, c)) == 0,
|
||||
i < config.size() && config[i] == std::string(1, c),
|
||||
"Error parsing CachingAllocator settings, expected ",
|
||||
c,
|
||||
"");
|
||||
@ -77,15 +77,16 @@ size_t CUDAAllocatorConfig::parseMaxSplitSize(
|
||||
const std::vector<std::string>& config,
|
||||
size_t i) {
|
||||
consumeToken(config, ++i, ':');
|
||||
constexpr int mb = 1024 * 1024;
|
||||
if (++i < config.size()) {
|
||||
size_t val1 = stoi(config[i]);
|
||||
TORCH_CHECK(
|
||||
val1 > kLargeBuffer / (1024 * 1024),
|
||||
val1 > kLargeBuffer / mb,
|
||||
"CachingAllocator option max_split_size_mb too small, must be > ",
|
||||
kLargeBuffer / (1024 * 1024),
|
||||
kLargeBuffer / mb,
|
||||
"");
|
||||
val1 = std::max(val1, kLargeBuffer / (1024 * 1024));
|
||||
val1 = std::min(val1, (std::numeric_limits<size_t>::max() / (1024 * 1024)));
|
||||
val1 = std::max(val1, kLargeBuffer / mb);
|
||||
val1 = std::min(val1, (std::numeric_limits<size_t>::max() / mb));
|
||||
m_max_split_size = val1 * 1024 * 1024;
|
||||
} else {
|
||||
TORCH_CHECK(false, "Error, expecting max_split_size_mb value", "");
|
||||
@ -118,9 +119,9 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
|
||||
bool first_value = true;
|
||||
|
||||
if (++i < config.size()) {
|
||||
if (config[i].compare("[") == 0) {
|
||||
if (std::string_view(config[i]) == "[") {
|
||||
size_t last_index = 0;
|
||||
while (++i < config.size() && config[i].compare("]") != 0) {
|
||||
while (++i < config.size() && std::string_view(config[i]) != "]") {
|
||||
const std::string& val1 = config[i];
|
||||
size_t val2 = 0;
|
||||
|
||||
@ -136,7 +137,7 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
|
||||
"For roundups, the divisons has to be power of 2 ",
|
||||
"");
|
||||
|
||||
if (val1.compare(">") == 0) {
|
||||
if (std::string_view(val1) == ">") {
|
||||
std::fill(
|
||||
std::next(
|
||||
m_roundup_power2_divisions.begin(),
|
||||
@ -171,7 +172,7 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
|
||||
last_index = index;
|
||||
}
|
||||
|
||||
if (config[i + 1].compare("]") != 0) {
|
||||
if (std::string_view(config[i + 1]) != "]") {
|
||||
consumeToken(config, ++i, ',');
|
||||
}
|
||||
}
|
||||
@ -253,51 +254,61 @@ void CUDAAllocatorConfig::parseArgs(const char* env) {
|
||||
lexArgs(env, config);
|
||||
|
||||
for (size_t i = 0; i < config.size(); i++) {
|
||||
if (config[i].compare("max_split_size_mb") == 0) {
|
||||
std::string_view config_item_view(config[i]);
|
||||
if (config_item_view == "max_split_size_mb") {
|
||||
i = parseMaxSplitSize(config, i);
|
||||
used_native_specific_option = true;
|
||||
} else if (config[i].compare("garbage_collection_threshold") == 0) {
|
||||
} else if (config_item_view == "garbage_collection_threshold") {
|
||||
i = parseGarbageCollectionThreshold(config, i);
|
||||
used_native_specific_option = true;
|
||||
} else if (config[i].compare("roundup_power2_divisions") == 0) {
|
||||
} else if (config_item_view == "roundup_power2_divisions") {
|
||||
i = parseRoundUpPower2Divisions(config, i);
|
||||
used_native_specific_option = true;
|
||||
} else if (config[i].compare("backend") == 0) {
|
||||
} else if (config_item_view == "backend") {
|
||||
i = parseAllocatorConfig(config, i, used_cudaMallocAsync);
|
||||
} else if (config[i] == "expandable_segments") {
|
||||
} else if (config_item_view == "expandable_segments") {
|
||||
used_native_specific_option = true;
|
||||
consumeToken(config, ++i, ':');
|
||||
++i;
|
||||
TORCH_CHECK(
|
||||
i < config.size() && (config[i] == "True" || config[i] == "False"),
|
||||
i < config.size() &&
|
||||
(std::string_view(config[i]) == "True" ||
|
||||
std::string_view(config[i]) == "False"),
|
||||
"Expected a single True/False argument for expandable_segments");
|
||||
m_expandable_segments = (config[i] == "True");
|
||||
config_item_view = config[i];
|
||||
m_expandable_segments = (config_item_view == "True");
|
||||
} else if (
|
||||
// ROCm build's hipify step will change "cuda" to "hip", but for ease of
|
||||
// use, accept both. We must break up the string to prevent hipify here.
|
||||
config[i].compare("release_lock_on_hipmalloc") == 0 ||
|
||||
config[i].compare("release_lock_on_c"
|
||||
"udamalloc") == 0) {
|
||||
config_item_view == "release_lock_on_hipmalloc" ||
|
||||
config_item_view ==
|
||||
"release_lock_on_c"
|
||||
"udamalloc") {
|
||||
used_native_specific_option = true;
|
||||
consumeToken(config, ++i, ':');
|
||||
++i;
|
||||
TORCH_CHECK(
|
||||
i < config.size() && (config[i] == "True" || config[i] == "False"),
|
||||
i < config.size() &&
|
||||
(std::string_view(config[i]) == "True" ||
|
||||
std::string_view(config[i]) == "False"),
|
||||
"Expected a single True/False argument for release_lock_on_cudamalloc");
|
||||
m_release_lock_on_cudamalloc = (config[i] == "True");
|
||||
config_item_view = config[i];
|
||||
m_release_lock_on_cudamalloc = (config_item_view == "True");
|
||||
} else if (
|
||||
// ROCm build's hipify step will change "cuda" to "hip", but for ease of
|
||||
// use, accept both. We must break up the string to prevent hipify here.
|
||||
config[i].compare("pinned_use_hip_host_register") == 0 ||
|
||||
config[i].compare("pinned_use_c"
|
||||
"uda_host_register") == 0) {
|
||||
config_item_view == "pinned_use_hip_host_register" ||
|
||||
config_item_view ==
|
||||
"pinned_use_c"
|
||||
"uda_host_register") {
|
||||
i = parsePinnedUseCudaHostRegister(config, i);
|
||||
used_native_specific_option = true;
|
||||
} else if (config[i].compare("pinned_num_register_threads") == 0) {
|
||||
} else if (config_item_view == "pinned_num_register_threads") {
|
||||
i = parsePinnedNumRegisterThreads(config, i);
|
||||
used_native_specific_option = true;
|
||||
} else {
|
||||
TORCH_CHECK(false, "Unrecognized CachingAllocator option: ", config[i]);
|
||||
TORCH_CHECK(
|
||||
false, "Unrecognized CachingAllocator option: ", config_item_view);
|
||||
}
|
||||
|
||||
if (i + 1 < config.size()) {
|
||||
|
@ -2,13 +2,13 @@
|
||||
|
||||
#include <c10/cuda/CUDAMacros.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace c10::cuda::CUDACachingAllocator {
|
||||
|
||||
|
@ -6,8 +6,6 @@
|
||||
#include <c10/cuda/CUDAMacros.h>
|
||||
#include <c10/cuda/impl/CUDAGuardImpl.h>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace c10::cuda {
|
||||
|
||||
// This code is kind of boilerplatey. See Note [Whither the DeviceGuard
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <c10/cuda/CUDAMiscFunctions.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace c10::cuda {
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
@ -38,14 +39,19 @@ static int max_stream_priorities;
|
||||
// the destruction.
|
||||
#if !defined(USE_ROCM)
|
||||
// CUDA-only: used to initializes the stream pools (once)
|
||||
static c10::once_flag device_flags[C10_COMPILE_TIME_MAX_GPUS];
|
||||
static std::array<c10::once_flag, C10_COMPILE_TIME_MAX_GPUS> device_flags;
|
||||
#endif
|
||||
static std::atomic<uint32_t>
|
||||
priority_counters[c10::cuda::max_compile_time_stream_priorities]
|
||||
[C10_COMPILE_TIME_MAX_GPUS];
|
||||
static std::array<
|
||||
std::array<std::atomic<uint32_t>, C10_COMPILE_TIME_MAX_GPUS>,
|
||||
c10::cuda::max_compile_time_stream_priorities>
|
||||
priority_counters;
|
||||
|
||||
static cudaStream_t streams[c10::cuda::max_compile_time_stream_priorities]
|
||||
[C10_COMPILE_TIME_MAX_GPUS][kStreamsPerPool];
|
||||
static std::array<
|
||||
std::array<
|
||||
std::array<cudaStream_t, kStreamsPerPool>,
|
||||
C10_COMPILE_TIME_MAX_GPUS>,
|
||||
c10::cuda::max_compile_time_stream_priorities>
|
||||
streams;
|
||||
#ifdef USE_ROCM
|
||||
static c10::once_flag
|
||||
stream_flags[c10::cuda::max_compile_time_stream_priorities]
|
||||
@ -159,6 +165,7 @@ StreamId makeStreamId(StreamIdType st, size_t si) {
|
||||
}
|
||||
|
||||
// Thread-local current streams
|
||||
// NOLINTNEXTLINE(*-arrays)
|
||||
static thread_local std::unique_ptr<StreamId[]> current_streams = nullptr;
|
||||
|
||||
// Populates global values.
|
||||
@ -227,6 +234,7 @@ static void initCUDAStreamsOnce() {
|
||||
}
|
||||
|
||||
// Inits current streams (thread local) to default streams
|
||||
// NOLINTNEXTLINE(*-arrays)
|
||||
current_streams = std::make_unique<StreamId[]>(num_gpus);
|
||||
for (const auto i : c10::irange(num_gpus)) {
|
||||
current_streams[i] = makeStreamId(StreamIdType::DEFAULT, 0);
|
||||
@ -274,6 +282,7 @@ cudaStream_t CUDAStream::stream() const {
|
||||
" official API like c10::cuda::getStreamFromPool() to get a new stream.");
|
||||
return nullptr;
|
||||
} else if (st.isExt()) {
|
||||
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
||||
return reinterpret_cast<cudaStream_t>(stream_id);
|
||||
} else {
|
||||
auto streamType = st.getStreamType();
|
||||
|
@ -1,8 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <c10/core/DeviceGuard.h>
|
||||
|
Reference in New Issue
Block a user