mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Clang-tidy header][15/N] Enable clang-tidy on headers in c10/cuda and c10/mobile (#116602)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/116602 Approved by: https://github.com/ezyang
This commit is contained in:
@ -246,8 +246,7 @@ code = 'CLANGTIDY'
|
|||||||
include_patterns = [
|
include_patterns = [
|
||||||
'aten/src/ATen/core/*.cpp',
|
'aten/src/ATen/core/*.cpp',
|
||||||
'c10/**/*.cpp',
|
'c10/**/*.cpp',
|
||||||
'c10/core/**/*.h',
|
'c10/**/*.h',
|
||||||
'c10/util/**/*.h',
|
|
||||||
# Enable coverage of headers in torch/csrc and excluding sub-directories for now.
|
# Enable coverage of headers in torch/csrc and excluding sub-directories for now.
|
||||||
'torch/csrc/*.h',
|
'torch/csrc/*.h',
|
||||||
'torch/csrc/**/*.cpp',
|
'torch/csrc/**/*.cpp',
|
||||||
@ -258,10 +257,10 @@ exclude_patterns = [
|
|||||||
# CUDA files are also excluded.
|
# CUDA files are also excluded.
|
||||||
'**/fb/**',
|
'**/fb/**',
|
||||||
'**/*pb.h',
|
'**/*pb.h',
|
||||||
'**/*CUDA*',
|
|
||||||
'**/cuda/*pp',
|
'**/cuda/*pp',
|
||||||
'**/*XPU*',
|
'**/*XPU*',
|
||||||
'**/xpu/*pp',
|
'**/xpu/*pp',
|
||||||
|
'c10/cuda/CUDAAlgorithm.h',
|
||||||
'c10/util/complex_math.h',
|
'c10/util/complex_math.h',
|
||||||
'c10/util/complex_utils.h',
|
'c10/util/complex_utils.h',
|
||||||
'c10/util/flat_hash_map.h',
|
'c10/util/flat_hash_map.h',
|
||||||
@ -272,6 +271,7 @@ exclude_patterns = [
|
|||||||
'c10/util/SmallVector.h',
|
'c10/util/SmallVector.h',
|
||||||
'c10/util/win32-headers.h',
|
'c10/util/win32-headers.h',
|
||||||
'c10/util/*inl.h',
|
'c10/util/*inl.h',
|
||||||
|
'c10/test/**/*.h',
|
||||||
'aten/src/ATen/core/TensorImpl_test.cpp',
|
'aten/src/ATen/core/TensorImpl_test.cpp',
|
||||||
'third_party/**/*',
|
'third_party/**/*',
|
||||||
'torch/csrc/api/**',
|
'torch/csrc/api/**',
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#include <c10/cuda/CUDAAllocatorConfig.h>
|
#include <c10/cuda/CUDAAllocatorConfig.h>
|
||||||
|
#include <c10/cuda/CUDACachingAllocator.h>
|
||||||
|
#include <c10/util/llvmMathExtras.h>
|
||||||
|
|
||||||
#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
|
#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
|
||||||
#include <c10/cuda/driver_api.h>
|
#include <c10/cuda/driver_api.h>
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <c10/cuda/CUDACachingAllocator.h>
|
|
||||||
#include <c10/cuda/CUDAException.h>
|
|
||||||
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/cuda/CUDAMacros.h>
|
||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
#include <c10/util/llvmMathExtras.h>
|
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <vector>
|
#include <cstddef>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace c10::cuda::CUDACachingAllocator {
|
namespace c10::cuda::CUDACachingAllocator {
|
||||||
|
|
||||||
@ -74,8 +73,8 @@ class C10_CUDA_API CUDAAllocatorConfig {
|
|||||||
private:
|
private:
|
||||||
CUDAAllocatorConfig();
|
CUDAAllocatorConfig();
|
||||||
|
|
||||||
void lexArgs(const char* env, std::vector<std::string>& config);
|
static void lexArgs(const char* env, std::vector<std::string>& config);
|
||||||
void consumeToken(
|
static void consumeToken(
|
||||||
const std::vector<std::string>& config,
|
const std::vector<std::string>& config,
|
||||||
size_t i,
|
size_t i,
|
||||||
const char c);
|
const char c);
|
||||||
|
@ -22,13 +22,10 @@
|
|||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <bitset>
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iterator>
|
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
@ -1,17 +1,22 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <c10/core/Allocator.h>
|
#include <c10/core/Allocator.h>
|
||||||
#include <c10/core/StorageImpl.h>
|
|
||||||
#include <c10/cuda/CUDAGraphsC10Utils.h>
|
#include <c10/cuda/CUDAGraphsC10Utils.h>
|
||||||
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/cuda/CUDAMacros.h>
|
||||||
#include <c10/cuda/CUDAStream.h>
|
#include <c10/cuda/CUDAStream.h>
|
||||||
#include <c10/util/ApproximateClock.h>
|
#include <c10/util/ApproximateClock.h>
|
||||||
|
#include <c10/util/Exception.h>
|
||||||
#include <c10/util/Registry.h>
|
#include <c10/util/Registry.h>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <mutex>
|
#include <atomic>
|
||||||
#include <set>
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace c10 {
|
namespace c10 {
|
||||||
|
|
||||||
@ -101,7 +106,7 @@ struct DeviceStats {
|
|||||||
int64_t max_split_size = 0;
|
int64_t max_split_size = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::shared_ptr<GatheredContext> (*CreateContextFn)(void);
|
typedef std::shared_ptr<GatheredContext> (*CreateContextFn)();
|
||||||
|
|
||||||
// Struct containing info of an allocation block (i.e. a fractional part of a
|
// Struct containing info of an allocation block (i.e. a fractional part of a
|
||||||
// cudaMalloc)..
|
// cudaMalloc)..
|
||||||
@ -123,7 +128,7 @@ struct SegmentInfo {
|
|||||||
int64_t requested_size = 0; // unrounded, actually requested size
|
int64_t requested_size = 0; // unrounded, actually requested size
|
||||||
int64_t allocated_size = 0;
|
int64_t allocated_size = 0;
|
||||||
int64_t active_size = 0;
|
int64_t active_size = 0;
|
||||||
cudaStream_t stream = 0;
|
cudaStream_t stream = nullptr;
|
||||||
bool is_large = false;
|
bool is_large = false;
|
||||||
bool is_expandable = false;
|
bool is_expandable = false;
|
||||||
MempoolId_t owner_private_pool_id = {0, 0};
|
MempoolId_t owner_private_pool_id = {0, 0};
|
||||||
@ -170,16 +175,16 @@ struct TraceEntry {
|
|||||||
addr_(addr),
|
addr_(addr),
|
||||||
context_(std::move(context)),
|
context_(std::move(context)),
|
||||||
stream_(stream),
|
stream_(stream),
|
||||||
size_(size) {
|
size_(static_cast<int64_t>(size)) {
|
||||||
time_.approx_t_ = time;
|
time_.approx_t_ = time;
|
||||||
}
|
}
|
||||||
Action action_;
|
Action action_;
|
||||||
int device_;
|
int device_;
|
||||||
int64_t addr_; // for OOM, this is the amount of free bytes reported by cuda
|
int64_t addr_; // for OOM, this is the amount of free bytes reported by cuda
|
||||||
std::shared_ptr<GatheredContext> context_;
|
std::shared_ptr<GatheredContext> context_;
|
||||||
cudaStream_t stream_;
|
cudaStream_t stream_{};
|
||||||
int64_t size_;
|
int64_t size_;
|
||||||
trace_time_ time_;
|
trace_time_ time_{};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SnapshotInfo {
|
struct SnapshotInfo {
|
||||||
@ -372,7 +377,7 @@ inline std::shared_ptr<AllocatorState> getCheckpointState(
|
|||||||
inline CheckpointDelta setCheckpointPoolState(
|
inline CheckpointDelta setCheckpointPoolState(
|
||||||
int device,
|
int device,
|
||||||
std::shared_ptr<AllocatorState> pps) {
|
std::shared_ptr<AllocatorState> pps) {
|
||||||
return get()->setCheckpointPoolState(device, pps);
|
return get()->setCheckpointPoolState(device, std::move(pps));
|
||||||
}
|
}
|
||||||
|
|
||||||
// CUDAGraph interactions
|
// CUDAGraph interactions
|
||||||
@ -409,11 +414,11 @@ inline bool checkPoolLiveAllocations(
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) {
|
inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) {
|
||||||
return get()->attachOutOfMemoryObserver(observer);
|
return get()->attachOutOfMemoryObserver(std::move(observer));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) {
|
inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) {
|
||||||
return get()->attachAllocatorTraceTracker(tracker);
|
return get()->attachAllocatorTraceTracker(std::move(tracker));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void releasePool(int device, MempoolId_t mempool_id) {
|
inline void releasePool(int device, MempoolId_t mempool_id) {
|
||||||
@ -421,7 +426,7 @@ inline void releasePool(int device, MempoolId_t mempool_id) {
|
|||||||
}
|
}
|
||||||
// Not part of CUDA_ALLOCATOR_BACKEND_INTERFACE
|
// Not part of CUDA_ALLOCATOR_BACKEND_INTERFACE
|
||||||
inline std::shared_ptr<void> getIpcDevPtr(std::string handle) {
|
inline std::shared_ptr<void> getIpcDevPtr(std::string handle) {
|
||||||
return get()->getIpcDevPtr(handle);
|
return get()->getIpcDevPtr(std::move(handle));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string name() {
|
inline std::string name() {
|
||||||
|
@ -2,9 +2,11 @@
|
|||||||
|
|
||||||
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/cuda/CUDAMacros.h>
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
@ -22,19 +24,24 @@ namespace c10::cuda {
|
|||||||
/// Held in managed memory and access by both the CPU and the GPU.
|
/// Held in managed memory and access by both the CPU and the GPU.
|
||||||
struct DeviceAssertionData {
|
struct DeviceAssertionData {
|
||||||
/// Stringification of the assertion
|
/// Stringification of the assertion
|
||||||
char assertion_msg[C10_CUDA_DSA_MAX_STR_LEN];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
char assertion_msg[C10_CUDA_DSA_MAX_STR_LEN]{};
|
||||||
/// File the assertion was in
|
/// File the assertion was in
|
||||||
char filename[C10_CUDA_DSA_MAX_STR_LEN];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
char filename[C10_CUDA_DSA_MAX_STR_LEN]{};
|
||||||
/// Name of the function the assertion was in
|
/// Name of the function the assertion was in
|
||||||
char function_name[C10_CUDA_DSA_MAX_STR_LEN];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
char function_name[C10_CUDA_DSA_MAX_STR_LEN]{};
|
||||||
/// Line number the assertion was at
|
/// Line number the assertion was at
|
||||||
int line_number;
|
int line_number{};
|
||||||
/// Number uniquely identifying the kernel launch that triggered the assertion
|
/// Number uniquely identifying the kernel launch that triggered the assertion
|
||||||
uint32_t caller;
|
uint32_t caller{};
|
||||||
/// block_id of the thread that failed the assertion
|
/// block_id of the thread that failed the assertion
|
||||||
int32_t block_id[3];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
int32_t block_id[3]{};
|
||||||
/// third_id of the thread that failed the assertion
|
/// third_id of the thread that failed the assertion
|
||||||
int32_t thread_id[3];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
int32_t thread_id[3]{};
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Used to hold assertions generated by the device
|
/// Used to hold assertions generated by the device
|
||||||
@ -42,9 +49,10 @@ struct DeviceAssertionData {
|
|||||||
struct DeviceAssertionsData {
|
struct DeviceAssertionsData {
|
||||||
/// Total number of assertions found; a subset of thse will be recorded
|
/// Total number of assertions found; a subset of thse will be recorded
|
||||||
/// in `assertions`
|
/// in `assertions`
|
||||||
int32_t assertion_count;
|
int32_t assertion_count{};
|
||||||
/// An array of assertions that will be written to in a race-free manner
|
/// An array of assertions that will be written to in a race-free manner
|
||||||
DeviceAssertionData assertions[C10_CUDA_DSA_ASSERTION_COUNT];
|
// NOLINTNEXTLINE(*-c-arrays)
|
||||||
|
DeviceAssertionData assertions[C10_CUDA_DSA_ASSERTION_COUNT]{};
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Use to hold info about kernel launches so that we can run kernels
|
/// Use to hold info about kernel launches so that we can run kernels
|
||||||
|
@ -19,8 +19,8 @@ using MempoolId_t = std::pair<CaptureId_t, CaptureId_t>;
|
|||||||
// that controls the error-checking strictness of a capture.
|
// that controls the error-checking strictness of a capture.
|
||||||
#if !defined(USE_ROCM) || ROCM_VERSION >= 50300
|
#if !defined(USE_ROCM) || ROCM_VERSION >= 50300
|
||||||
struct C10_CUDA_API CUDAStreamCaptureModeGuard {
|
struct C10_CUDA_API CUDAStreamCaptureModeGuard {
|
||||||
CUDAStreamCaptureModeGuard(cudaStreamCaptureMode desired) {
|
CUDAStreamCaptureModeGuard(cudaStreamCaptureMode desired)
|
||||||
strictness_ = desired;
|
: strictness_(desired) {
|
||||||
C10_CUDA_CHECK(cudaThreadExchangeStreamCaptureMode(&strictness_));
|
C10_CUDA_CHECK(cudaThreadExchangeStreamCaptureMode(&strictness_));
|
||||||
}
|
}
|
||||||
~CUDAStreamCaptureModeGuard() {
|
~CUDAStreamCaptureModeGuard() {
|
||||||
@ -79,7 +79,7 @@ inline std::ostream& operator<<(std::ostream& os, CaptureStatus status) {
|
|||||||
// Use this version where you're sure a CUDA context exists already.
|
// Use this version where you're sure a CUDA context exists already.
|
||||||
inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
|
inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
|
||||||
#if !defined(USE_ROCM) || ROCM_VERSION >= 50300
|
#if !defined(USE_ROCM) || ROCM_VERSION >= 50300
|
||||||
cudaStreamCaptureStatus is_capturing;
|
cudaStreamCaptureStatus is_capturing{cudaStreamCaptureStatusNone};
|
||||||
C10_CUDA_CHECK(
|
C10_CUDA_CHECK(
|
||||||
cudaStreamIsCapturing(c10::cuda::getCurrentCUDAStream(), &is_capturing));
|
cudaStreamIsCapturing(c10::cuda::getCurrentCUDAStream(), &is_capturing));
|
||||||
return CaptureStatus(is_capturing);
|
return CaptureStatus(is_capturing);
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <c10/core/DeviceGuard.h>
|
|
||||||
#include <c10/core/impl/DeviceGuardImplInterface.h>
|
#include <c10/core/impl/DeviceGuardImplInterface.h>
|
||||||
#include <c10/core/impl/GPUTrace.h>
|
#include <c10/core/impl/GPUTrace.h>
|
||||||
#include <c10/macros/Macros.h>
|
#include <c10/macros/Macros.h>
|
||||||
@ -11,7 +10,13 @@
|
|||||||
#include <c10/cuda/CUDAFunctions.h>
|
#include <c10/cuda/CUDAFunctions.h>
|
||||||
#include <c10/cuda/CUDAStream.h>
|
#include <c10/cuda/CUDAStream.h>
|
||||||
|
|
||||||
|
#include <c10/core/Device.h>
|
||||||
|
#include <c10/core/DeviceType.h>
|
||||||
|
#include <c10/core/Stream.h>
|
||||||
|
#include <c10/core/impl/PyInterpreter.h>
|
||||||
|
#include <c10/util/Optional.h>
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
namespace c10 {
|
namespace c10 {
|
||||||
namespace cuda {
|
namespace cuda {
|
||||||
@ -30,21 +35,21 @@ struct CUDAGuardImpl final : public c10::impl::DeviceGuardImplInterface {
|
|||||||
Device exchangeDevice(Device d) const override {
|
Device exchangeDevice(Device d) const override {
|
||||||
TORCH_INTERNAL_ASSERT(d.is_cuda());
|
TORCH_INTERNAL_ASSERT(d.is_cuda());
|
||||||
int old_device_index = c10::cuda::ExchangeDevice(d.index());
|
int old_device_index = c10::cuda::ExchangeDevice(d.index());
|
||||||
return Device(DeviceType::CUDA, old_device_index);
|
return Device(DeviceType::CUDA, static_cast<DeviceIndex>(old_device_index));
|
||||||
}
|
}
|
||||||
Device getDevice() const override {
|
Device getDevice() const override {
|
||||||
int device;
|
int device = 0;
|
||||||
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
|
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
|
||||||
return Device(DeviceType::CUDA, device);
|
return Device(DeviceType::CUDA, static_cast<DeviceIndex>(device));
|
||||||
}
|
}
|
||||||
c10::optional<Device> uncheckedGetDevice() const noexcept {
|
c10::optional<Device> uncheckedGetDevice() const noexcept {
|
||||||
int device;
|
int device = 0;
|
||||||
const auto err = C10_CUDA_ERROR_HANDLED(c10::cuda::GetDevice(&device));
|
const auto err = C10_CUDA_ERROR_HANDLED(c10::cuda::GetDevice(&device));
|
||||||
C10_CUDA_CHECK_WARN(err);
|
C10_CUDA_CHECK_WARN(err);
|
||||||
if (err != cudaSuccess) {
|
if (err != cudaSuccess) {
|
||||||
return c10::nullopt;
|
return c10::nullopt;
|
||||||
}
|
}
|
||||||
return Device(DeviceType::CUDA, device);
|
return Device(DeviceType::CUDA, static_cast<DeviceIndex>(device));
|
||||||
}
|
}
|
||||||
void setDevice(Device d) const override {
|
void setDevice(Device d) const override {
|
||||||
TORCH_INTERNAL_ASSERT(d.is_cuda());
|
TORCH_INTERNAL_ASSERT(d.is_cuda());
|
||||||
@ -104,7 +109,7 @@ struct CUDAGuardImpl final : public c10::impl::DeviceGuardImplInterface {
|
|||||||
if (!event)
|
if (!event)
|
||||||
return;
|
return;
|
||||||
auto cuda_event = static_cast<cudaEvent_t>(event);
|
auto cuda_event = static_cast<cudaEvent_t>(event);
|
||||||
int orig_device;
|
int orig_device = 0;
|
||||||
C10_CUDA_CHECK_WARN(c10::cuda::GetDevice(&orig_device));
|
C10_CUDA_CHECK_WARN(c10::cuda::GetDevice(&orig_device));
|
||||||
C10_CUDA_CHECK_WARN(c10::cuda::SetDevice(device_index));
|
C10_CUDA_CHECK_WARN(c10::cuda::SetDevice(device_index));
|
||||||
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
|
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
|
#include <c10/macros/Export.h>
|
||||||
#include <c10/util/SmallVector.h>
|
#include <c10/util/SmallVector.h>
|
||||||
#include <c10/util/flat_hash_map.h>
|
#include <c10/util/flat_hash_map.h>
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <c10/macros/Export.h>
|
||||||
#include <c10/util/flat_hash_map.h>
|
#include <c10/util/flat_hash_map.h>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user