[c10] Use nested namespace in c10/cuda (#116464)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116464
Approved by: https://github.com/Skylion007
This commit is contained in:
Nikita Shulga
2023-12-27 09:52:40 -08:00
committed by PyTorch MergeBot
parent 93b86bf531
commit 53e32d12c4
15 changed files with 36 additions and 75 deletions

View File

@ -4,8 +4,7 @@
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
#include <thrust/functional.h> #include <thrust/functional.h>
#endif #endif
namespace c10 { namespace c10::cuda {
namespace cuda {
#ifdef THRUST_DEVICE_LOWER_BOUND_WORKS #ifdef THRUST_DEVICE_LOWER_BOUND_WORKS
template <typename Iter, typename Scalar> template <typename Iter, typename Scalar>
__forceinline__ __device__ Iter __forceinline__ __device__ Iter
@ -29,5 +28,4 @@ __device__ Iter lower_bound(Iter start, Iter end, Scalar value) {
return end; return end;
} }
#endif // THRUST_DEVICE_LOWER_BOUND_WORKS #endif // THRUST_DEVICE_LOWER_BOUND_WORKS
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -10,9 +10,7 @@
#include <atomic> #include <atomic>
#include <vector> #include <vector>
namespace c10 { namespace c10::cuda::CUDACachingAllocator {
namespace cuda {
namespace CUDACachingAllocator {
// Environment config parser // Environment config parser
class C10_CUDA_API CUDAAllocatorConfig { class C10_CUDA_API CUDAAllocatorConfig {
@ -111,6 +109,4 @@ class C10_CUDA_API CUDAAllocatorConfig {
// General caching allocator utilities // General caching allocator utilities
C10_CUDA_API void setAllocatorSettings(const std::string& env); C10_CUDA_API void setAllocatorSettings(const std::string& env);
} // namespace CUDACachingAllocator } // namespace c10::cuda::CUDACachingAllocator
} // namespace cuda
} // namespace c10

View File

@ -26,9 +26,8 @@ class C10_CUDA_API FreeMemoryCallback {
C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback); C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \ #define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__); C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__);
} // namespace c10
namespace cuda { //
// TODO: Turn this into an honest to goodness class. I briefly attempted to do // TODO: Turn this into an honest to goodness class. I briefly attempted to do
// this, but it was a bit irritating to figure out how to also correctly // this, but it was a bit irritating to figure out how to also correctly
// apply pimpl pattern so I didn't have to leak any internal implementation // apply pimpl pattern so I didn't have to leak any internal implementation
@ -42,7 +41,7 @@ namespace cuda {
// not counted as a word boundary, so you would otherwise have to list each // not counted as a word boundary, so you would otherwise have to list each
// of these functions. // of these functions.
namespace CUDACachingAllocator { namespace c10::cuda::CUDACachingAllocator {
extern const size_t kLargeBuffer; extern const size_t kLargeBuffer;
@ -445,6 +444,4 @@ inline void enablePeerAccess(int dev, int dev_to_access) {
return get()->enablePeerAccess(dev, dev_to_access); return get()->enablePeerAccess(dev, dev_to_access);
} }
} // namespace CUDACachingAllocator } // namespace c10::cuda::CUDACachingAllocator
} // namespace cuda
} // namespace c10

View File

@ -3,8 +3,7 @@
#include <c10/cuda/CUDAException.h> #include <c10/cuda/CUDAException.h>
#include <c10/macros/Macros.h> #include <c10/macros/Macros.h>
namespace c10 { namespace c10::cuda {
namespace cuda {
#ifdef TORCH_USE_CUDA_DSA #ifdef TORCH_USE_CUDA_DSA
// Copy string from `src` to `dst` // Copy string from `src` to `dst`
@ -94,5 +93,4 @@ static __device__ void dsa_add_new_assertion_failure(
#define CUDA_KERNEL_ASSERT2(condition) assert(condition) #define CUDA_KERNEL_ASSERT2(condition) assert(condition)
#endif #endif
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -16,8 +16,7 @@
constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10; constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10;
constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512; constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512;
namespace c10 { namespace c10::cuda {
namespace cuda {
/// Holds information about any device-side assertions that fail. /// Holds information about any device-side assertions that fail.
/// Held in managed memory and access by both the CPU and the GPU. /// Held in managed memory and access by both the CPU and the GPU.
@ -143,8 +142,7 @@ class C10_CUDA_API CUDAKernelLaunchRegistry {
std::string c10_retrieve_device_side_assertion_info(); std::string c10_retrieve_device_side_assertion_info();
} // namespace cuda } // namespace c10::cuda
} // namespace c10
// Each kernel launched with TORCH_DSA_KERNEL_LAUNCH // Each kernel launched with TORCH_DSA_KERNEL_LAUNCH
// requires the same input arguments. We introduce the following macro to // requires the same input arguments. We introduce the following macro to

View File

@ -86,8 +86,7 @@ class C10_CUDA_API CUDAError : public c10::Error {
C10_CUDA_KERNEL_LAUNCH_CHECK(); \ C10_CUDA_KERNEL_LAUNCH_CHECK(); \
} while (0) } while (0)
namespace c10 { namespace c10::cuda {
namespace cuda {
/// In the event of a CUDA failure, formats a nice error message about that /// In the event of a CUDA failure, formats a nice error message about that
/// failure and also checks for device-side assertion failures /// failure and also checks for device-side assertion failures
@ -98,5 +97,4 @@ C10_CUDA_API void c10_cuda_check_implementation(
const int line_number, const int line_number,
const bool include_device_assertions); const bool include_device_assertions);
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -12,8 +12,7 @@
#include <c10/cuda/CUDAException.h> #include <c10/cuda/CUDAException.h>
#include <c10/cuda/CUDAMacros.h> #include <c10/cuda/CUDAMacros.h>
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
namespace c10 { namespace c10::cuda {
namespace cuda {
// NB: In the past, we were inconsistent about whether or not this reported // NB: In the past, we were inconsistent about whether or not this reported
// an error if there were driver problems are not. Based on experience // an error if there were driver problems are not. Based on experience
@ -114,5 +113,4 @@ C10_CUDA_API void __inline__ stream_synchronize(cudaStream_t stream) {
C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index); C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index);
C10_CUDA_API c10::optional<DeviceIndex> getDeviceIndexWithPrimaryContext(); C10_CUDA_API c10::optional<DeviceIndex> getDeviceIndexWithPrimaryContext();
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -7,8 +7,7 @@
// CUDA Graphs utils used by c10 and aten. // CUDA Graphs utils used by c10 and aten.
// aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only. // aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
namespace c10 { namespace c10::cuda {
namespace cuda {
using CaptureId_t = unsigned long long; using CaptureId_t = unsigned long long;
@ -89,5 +88,4 @@ inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
#endif #endif
} }
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -8,8 +8,7 @@
#include <cstddef> #include <cstddef>
namespace c10 { namespace c10::cuda {
namespace cuda {
// This code is kind of boilerplatey. See Note [Whither the DeviceGuard // This code is kind of boilerplatey. See Note [Whither the DeviceGuard
// boilerplate] // boilerplate]
@ -301,5 +300,4 @@ struct CUDAMultiStreamGuard {
} }
}; };
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -18,9 +18,7 @@
#endif /* __CUDACC_RTC__ */ #endif /* __CUDACC_RTC__ */
#endif /* __HIPCC__ */ #endif /* __HIPCC__ */
namespace c10 { namespace c10::cuda::compat {
namespace cuda {
namespace compat {
__MATH_FUNCTIONS_DECL__ float abs(float x) { __MATH_FUNCTIONS_DECL__ float abs(float x) {
return ::fabsf(x); return ::fabsf(x);
@ -149,8 +147,6 @@ __MATH_FUNCTIONS_DECL__ double normcdf(double x) {
return ::normcdf(x); return ::normcdf(x);
} }
} // namespace compat } // namespace c10::cuda::compat
} // namespace cuda
} // namespace c10
#endif #endif

View File

@ -6,9 +6,7 @@
#include <mutex> #include <mutex>
namespace c10 { namespace c10::cuda {
namespace cuda {
C10_CUDA_API const char* get_cuda_check_suffix() noexcept; C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
C10_CUDA_API std::mutex* getFreeMutex(); C10_CUDA_API std::mutex* getFreeMutex();
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -52,8 +52,7 @@
* a kernel on the same stream from two different threads. * a kernel on the same stream from two different threads.
*/ */
namespace c10 { namespace c10::cuda {
namespace cuda {
static constexpr int max_compile_time_stream_priorities = 4; static constexpr int max_compile_time_stream_priorities = 4;
@ -260,8 +259,7 @@ C10_API void setCurrentCUDAStream(CUDAStream stream);
C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s); C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s);
} // namespace cuda } // namespace c10::cuda
} // namespace c10
namespace std { namespace std {
template <> template <>

View File

@ -35,8 +35,7 @@
_(nvmlDeviceGetNvLinkRemotePciInfo_v2) \ _(nvmlDeviceGetNvLinkRemotePciInfo_v2) \
_(nvmlDeviceGetComputeRunningProcesses) _(nvmlDeviceGetComputeRunningProcesses)
namespace c10 { namespace c10::cuda {
namespace cuda {
struct DriverAPI { struct DriverAPI {
#define CREATE_MEMBER(name) decltype(&name) name##_; #define CREATE_MEMBER(name) decltype(&name) name##_;
@ -47,5 +46,4 @@ struct DriverAPI {
static void* get_nvml_handle(); static void* get_nvml_handle();
}; };
} // namespace cuda } // namespace c10::cuda
} // namespace c10

View File

@ -152,13 +152,9 @@
// Simply define the namespace, in case a dependent library want to refer to // Simply define the namespace, in case a dependent library want to refer to
// the c10 namespace but not any nontrivial files. // the c10 namespace but not any nontrivial files.
namespace c10 {} // namespace c10 namespace c10 {}
namespace c10 { namespace c10::cuda {}
namespace cuda {} namespace c10::hip {}
} // namespace c10
namespace c10 {
namespace hip {}
} // namespace c10
// Since C10 is the core library for caffe2 (and aten), we will simply reroute // Since C10 is the core library for caffe2 (and aten), we will simply reroute
// all abstractions defined in c10 to be available in caffe2 as well. // all abstractions defined in c10 to be available in caffe2 as well.
@ -170,11 +166,9 @@ using namespace c10;
namespace at { namespace at {
using namespace c10; using namespace c10;
} }
namespace at { namespace at::cuda {
namespace cuda {
using namespace c10::cuda; using namespace c10::cuda;
} } // namespace at::cuda
} // namespace at
// WARNING!!! THIS IS A GIANT HACK!!! // WARNING!!! THIS IS A GIANT HACK!!!
// This line means you cannot simultaneously include c10/hip // This line means you cannot simultaneously include c10/hip
@ -184,11 +178,9 @@ using namespace c10::cuda;
// from at::cuda. This namespace makes that happen. When // from at::cuda. This namespace makes that happen. When
// HIPIFY is no longer out-of-place, we can switch the cuda // HIPIFY is no longer out-of-place, we can switch the cuda
// here to hip and everyone is happy. // here to hip and everyone is happy.
namespace at { namespace at::cuda {
namespace cuda {
using namespace c10::hip; using namespace c10::hip;
} } // namespace at::cuda
} // namespace at
// C10_LIKELY/C10_UNLIKELY // C10_LIKELY/C10_UNLIKELY
// //

View File

@ -37,7 +37,7 @@
namespace c10 { namespace c10 {
// in c++17 std::result_of has been superceded by std::invoke_result. Since // in c++17 std::result_of has been superseded by std::invoke_result. Since
// c++20, std::result_of is removed. // c++20, std::result_of is removed.
template <typename F, typename... args> template <typename F, typename... args>
#if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L #if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L