[Clang-tidy header][24/N] Fix clang-tidy warnings on c10/cuda/*.{cpp,h} (#120781)

This PR begins to clean clang-tidy warnings of code in c10/cuda.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/120781
Approved by: https://github.com/ezyang
This commit is contained in:
cyy
2024-03-15 05:03:22 +00:00
committed by PyTorch MergeBot
parent e4fda049c2
commit fb10e13000
6 changed files with 54 additions and 39 deletions

View File

@ -67,7 +67,7 @@ void CUDAAllocatorConfig::consumeToken(
size_t i,
const char c) {
TORCH_CHECK(
i < config.size() && config[i].compare(std::string(1, c)) == 0,
i < config.size() && config[i] == std::string(1, c),
"Error parsing CachingAllocator settings, expected ",
c,
"");
@ -77,15 +77,16 @@ size_t CUDAAllocatorConfig::parseMaxSplitSize(
const std::vector<std::string>& config,
size_t i) {
consumeToken(config, ++i, ':');
constexpr int mb = 1024 * 1024;
if (++i < config.size()) {
size_t val1 = stoi(config[i]);
TORCH_CHECK(
val1 > kLargeBuffer / (1024 * 1024),
val1 > kLargeBuffer / mb,
"CachingAllocator option max_split_size_mb too small, must be > ",
kLargeBuffer / (1024 * 1024),
kLargeBuffer / mb,
"");
val1 = std::max(val1, kLargeBuffer / (1024 * 1024));
val1 = std::min(val1, (std::numeric_limits<size_t>::max() / (1024 * 1024)));
val1 = std::max(val1, kLargeBuffer / mb);
val1 = std::min(val1, (std::numeric_limits<size_t>::max() / mb));
m_max_split_size = val1 * 1024 * 1024;
} else {
TORCH_CHECK(false, "Error, expecting max_split_size_mb value", "");
@ -118,9 +119,9 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
bool first_value = true;
if (++i < config.size()) {
if (config[i].compare("[") == 0) {
if (std::string_view(config[i]) == "[") {
size_t last_index = 0;
while (++i < config.size() && config[i].compare("]") != 0) {
while (++i < config.size() && std::string_view(config[i]) != "]") {
const std::string& val1 = config[i];
size_t val2 = 0;
@ -136,7 +137,7 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
"For roundups, the divisons has to be power of 2 ",
"");
if (val1.compare(">") == 0) {
if (std::string_view(val1) == ">") {
std::fill(
std::next(
m_roundup_power2_divisions.begin(),
@ -171,7 +172,7 @@ size_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
last_index = index;
}
if (config[i + 1].compare("]") != 0) {
if (std::string_view(config[i + 1]) != "]") {
consumeToken(config, ++i, ',');
}
}
@ -253,51 +254,61 @@ void CUDAAllocatorConfig::parseArgs(const char* env) {
lexArgs(env, config);
for (size_t i = 0; i < config.size(); i++) {
if (config[i].compare("max_split_size_mb") == 0) {
std::string_view config_item_view(config[i]);
if (config_item_view == "max_split_size_mb") {
i = parseMaxSplitSize(config, i);
used_native_specific_option = true;
} else if (config[i].compare("garbage_collection_threshold") == 0) {
} else if (config_item_view == "garbage_collection_threshold") {
i = parseGarbageCollectionThreshold(config, i);
used_native_specific_option = true;
} else if (config[i].compare("roundup_power2_divisions") == 0) {
} else if (config_item_view == "roundup_power2_divisions") {
i = parseRoundUpPower2Divisions(config, i);
used_native_specific_option = true;
} else if (config[i].compare("backend") == 0) {
} else if (config_item_view == "backend") {
i = parseAllocatorConfig(config, i, used_cudaMallocAsync);
} else if (config[i] == "expandable_segments") {
} else if (config_item_view == "expandable_segments") {
used_native_specific_option = true;
consumeToken(config, ++i, ':');
++i;
TORCH_CHECK(
i < config.size() && (config[i] == "True" || config[i] == "False"),
i < config.size() &&
(std::string_view(config[i]) == "True" ||
std::string_view(config[i]) == "False"),
"Expected a single True/False argument for expandable_segments");
m_expandable_segments = (config[i] == "True");
config_item_view = config[i];
m_expandable_segments = (config_item_view == "True");
} else if (
// ROCm build's hipify step will change "cuda" to "hip", but for ease of
// use, accept both. We must break up the string to prevent hipify here.
config[i].compare("release_lock_on_hipmalloc") == 0 ||
config[i].compare("release_lock_on_c"
"udamalloc") == 0) {
config_item_view == "release_lock_on_hipmalloc" ||
config_item_view ==
"release_lock_on_c"
"udamalloc") {
used_native_specific_option = true;
consumeToken(config, ++i, ':');
++i;
TORCH_CHECK(
i < config.size() && (config[i] == "True" || config[i] == "False"),
i < config.size() &&
(std::string_view(config[i]) == "True" ||
std::string_view(config[i]) == "False"),
"Expected a single True/False argument for release_lock_on_cudamalloc");
m_release_lock_on_cudamalloc = (config[i] == "True");
config_item_view = config[i];
m_release_lock_on_cudamalloc = (config_item_view == "True");
} else if (
// ROCm build's hipify step will change "cuda" to "hip", but for ease of
// use, accept both. We must break up the string to prevent hipify here.
config[i].compare("pinned_use_hip_host_register") == 0 ||
config[i].compare("pinned_use_c"
"uda_host_register") == 0) {
config_item_view == "pinned_use_hip_host_register" ||
config_item_view ==
"pinned_use_c"
"uda_host_register") {
i = parsePinnedUseCudaHostRegister(config, i);
used_native_specific_option = true;
} else if (config[i].compare("pinned_num_register_threads") == 0) {
} else if (config_item_view == "pinned_num_register_threads") {
i = parsePinnedNumRegisterThreads(config, i);
used_native_specific_option = true;
} else {
TORCH_CHECK(false, "Unrecognized CachingAllocator option: ", config[i]);
TORCH_CHECK(
false, "Unrecognized CachingAllocator option: ", config_item_view);
}
if (i + 1 < config.size()) {

View File

@ -2,13 +2,13 @@
#include <c10/cuda/CUDAMacros.h>
#include <c10/util/Exception.h>
#include <cuda_runtime_api.h>
#include <atomic>
#include <cstddef>
#include <cstdlib>
#include <mutex>
#include <string>
#include <vector>
namespace c10::cuda::CUDACachingAllocator {

View File

@ -6,8 +6,6 @@
#include <c10/cuda/CUDAMacros.h>
#include <c10/cuda/impl/CUDAGuardImpl.h>
#include <cstddef>
namespace c10::cuda {
// This code is kind of boilerplatey. See Note [Whither the DeviceGuard

View File

@ -1,5 +1,5 @@
#include <c10/cuda/CUDAMiscFunctions.h>
#include <stdlib.h>
#include <cstdlib>
namespace c10::cuda {

View File

@ -6,6 +6,7 @@
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <array>
#include <atomic>
#include <cstdint>
@ -38,14 +39,19 @@ static int max_stream_priorities;
// the destruction.
#if !defined(USE_ROCM)
// CUDA-only: used to initializes the stream pools (once)
static c10::once_flag device_flags[C10_COMPILE_TIME_MAX_GPUS];
static std::array<c10::once_flag, C10_COMPILE_TIME_MAX_GPUS> device_flags;
#endif
static std::atomic<uint32_t>
priority_counters[c10::cuda::max_compile_time_stream_priorities]
[C10_COMPILE_TIME_MAX_GPUS];
static std::array<
std::array<std::atomic<uint32_t>, C10_COMPILE_TIME_MAX_GPUS>,
c10::cuda::max_compile_time_stream_priorities>
priority_counters;
static cudaStream_t streams[c10::cuda::max_compile_time_stream_priorities]
[C10_COMPILE_TIME_MAX_GPUS][kStreamsPerPool];
static std::array<
std::array<
std::array<cudaStream_t, kStreamsPerPool>,
C10_COMPILE_TIME_MAX_GPUS>,
c10::cuda::max_compile_time_stream_priorities>
streams;
#ifdef USE_ROCM
static c10::once_flag
stream_flags[c10::cuda::max_compile_time_stream_priorities]
@ -159,6 +165,7 @@ StreamId makeStreamId(StreamIdType st, size_t si) {
}
// Thread-local current streams
// NOLINTNEXTLINE(*-arrays)
static thread_local std::unique_ptr<StreamId[]> current_streams = nullptr;
// Populates global values.
@ -227,6 +234,7 @@ static void initCUDAStreamsOnce() {
}
// Inits current streams (thread local) to default streams
// NOLINTNEXTLINE(*-arrays)
current_streams = std::make_unique<StreamId[]>(num_gpus);
for (const auto i : c10::irange(num_gpus)) {
current_streams[i] = makeStreamId(StreamIdType::DEFAULT, 0);
@ -274,6 +282,7 @@ cudaStream_t CUDAStream::stream() const {
" official API like c10::cuda::getStreamFromPool() to get a new stream.");
return nullptr;
} else if (st.isExt()) {
// NOLINTNEXTLINE(performance-no-int-to-ptr)
return reinterpret_cast<cudaStream_t>(stream_id);
} else {
auto streamType = st.getStreamType();

View File

@ -1,8 +1,5 @@
#pragma once
#include <cstdint>
#include <utility>
#include <cuda_runtime_api.h>
#include <c10/core/DeviceGuard.h>