[c10] Use nested namespace in c10/cuda (#116464)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116464
Approved by: https://github.com/Skylion007
This commit is contained in:
Nikita Shulga
2023-12-27 09:52:40 -08:00
committed by PyTorch MergeBot
parent 93b86bf531
commit 53e32d12c4
15 changed files with 36 additions and 75 deletions

View File

@ -4,8 +4,7 @@
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#endif
namespace c10 {
namespace cuda {
namespace c10::cuda {
#ifdef THRUST_DEVICE_LOWER_BOUND_WORKS
template <typename Iter, typename Scalar>
__forceinline__ __device__ Iter
@ -29,5 +28,4 @@ __device__ Iter lower_bound(Iter start, Iter end, Scalar value) {
return end;
}
#endif // THRUST_DEVICE_LOWER_BOUND_WORKS
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -10,9 +10,7 @@
#include <atomic>
#include <vector>
namespace c10 {
namespace cuda {
namespace CUDACachingAllocator {
namespace c10::cuda::CUDACachingAllocator {
// Environment config parser
class C10_CUDA_API CUDAAllocatorConfig {
@ -111,6 +109,4 @@ class C10_CUDA_API CUDAAllocatorConfig {
// General caching allocator utilities
C10_CUDA_API void setAllocatorSettings(const std::string& env);
} // namespace CUDACachingAllocator
} // namespace cuda
} // namespace c10
} // namespace c10::cuda::CUDACachingAllocator

View File

@ -26,9 +26,8 @@ class C10_CUDA_API FreeMemoryCallback {
C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__);
namespace cuda {
} // namespace c10
//
// TODO: Turn this into an honest to goodness class. I briefly attempted to do
// this, but it was a bit irritating to figure out how to also correctly
// apply pimpl pattern so I didn't have to leak any internal implementation
@ -42,7 +41,7 @@ namespace cuda {
// not counted as a word boundary, so you would otherwise have to list each
// of these functions.
namespace CUDACachingAllocator {
namespace c10::cuda::CUDACachingAllocator {
extern const size_t kLargeBuffer;
@ -445,6 +444,4 @@ inline void enablePeerAccess(int dev, int dev_to_access) {
return get()->enablePeerAccess(dev, dev_to_access);
}
} // namespace CUDACachingAllocator
} // namespace cuda
} // namespace c10
} // namespace c10::cuda::CUDACachingAllocator

View File

@ -3,8 +3,7 @@
#include <c10/cuda/CUDAException.h>
#include <c10/macros/Macros.h>
namespace c10 {
namespace cuda {
namespace c10::cuda {
#ifdef TORCH_USE_CUDA_DSA
// Copy string from `src` to `dst`
@ -94,5 +93,4 @@ static __device__ void dsa_add_new_assertion_failure(
#define CUDA_KERNEL_ASSERT2(condition) assert(condition)
#endif
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -16,8 +16,7 @@
constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10;
constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512;
namespace c10 {
namespace cuda {
namespace c10::cuda {
/// Holds information about any device-side assertions that fail.
/// Held in managed memory and access by both the CPU and the GPU.
@ -143,8 +142,7 @@ class C10_CUDA_API CUDAKernelLaunchRegistry {
std::string c10_retrieve_device_side_assertion_info();
} // namespace cuda
} // namespace c10
} // namespace c10::cuda
// Each kernel launched with TORCH_DSA_KERNEL_LAUNCH
// requires the same input arguments. We introduce the following macro to

View File

@ -86,8 +86,7 @@ class C10_CUDA_API CUDAError : public c10::Error {
C10_CUDA_KERNEL_LAUNCH_CHECK(); \
} while (0)
namespace c10 {
namespace cuda {
namespace c10::cuda {
/// In the event of a CUDA failure, formats a nice error message about that
/// failure and also checks for device-side assertion failures
@ -98,5 +97,4 @@ C10_CUDA_API void c10_cuda_check_implementation(
const int line_number,
const bool include_device_assertions);
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -12,8 +12,7 @@
#include <c10/cuda/CUDAException.h>
#include <c10/cuda/CUDAMacros.h>
#include <cuda_runtime_api.h>
namespace c10 {
namespace cuda {
namespace c10::cuda {
// NB: In the past, we were inconsistent about whether or not this reported
// an error if there were driver problems are not. Based on experience
@ -114,5 +113,4 @@ C10_CUDA_API void __inline__ stream_synchronize(cudaStream_t stream) {
C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index);
C10_CUDA_API c10::optional<DeviceIndex> getDeviceIndexWithPrimaryContext();
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -7,8 +7,7 @@
// CUDA Graphs utils used by c10 and aten.
// aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
namespace c10 {
namespace cuda {
namespace c10::cuda {
using CaptureId_t = unsigned long long;
@ -89,5 +88,4 @@ inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
#endif
}
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -8,8 +8,7 @@
#include <cstddef>
namespace c10 {
namespace cuda {
namespace c10::cuda {
// This code is kind of boilerplatey. See Note [Whither the DeviceGuard
// boilerplate]
@ -301,5 +300,4 @@ struct CUDAMultiStreamGuard {
}
};
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -18,9 +18,7 @@
#endif /* __CUDACC_RTC__ */
#endif /* __HIPCC__ */
namespace c10 {
namespace cuda {
namespace compat {
namespace c10::cuda::compat {
__MATH_FUNCTIONS_DECL__ float abs(float x) {
return ::fabsf(x);
@ -149,8 +147,6 @@ __MATH_FUNCTIONS_DECL__ double normcdf(double x) {
return ::normcdf(x);
}
} // namespace compat
} // namespace cuda
} // namespace c10
} // namespace c10::cuda::compat
#endif

View File

@ -6,9 +6,7 @@
#include <mutex>
namespace c10 {
namespace cuda {
namespace c10::cuda {
C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
C10_CUDA_API std::mutex* getFreeMutex();
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -52,8 +52,7 @@
* a kernel on the same stream from two different threads.
*/
namespace c10 {
namespace cuda {
namespace c10::cuda {
static constexpr int max_compile_time_stream_priorities = 4;
@ -260,8 +259,7 @@ C10_API void setCurrentCUDAStream(CUDAStream stream);
C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s);
} // namespace cuda
} // namespace c10
} // namespace c10::cuda
namespace std {
template <>

View File

@ -35,8 +35,7 @@
_(nvmlDeviceGetNvLinkRemotePciInfo_v2) \
_(nvmlDeviceGetComputeRunningProcesses)
namespace c10 {
namespace cuda {
namespace c10::cuda {
struct DriverAPI {
#define CREATE_MEMBER(name) decltype(&name) name##_;
@ -47,5 +46,4 @@ struct DriverAPI {
static void* get_nvml_handle();
};
} // namespace cuda
} // namespace c10
} // namespace c10::cuda

View File

@ -152,13 +152,9 @@
// Simply define the namespace, in case a dependent library want to refer to
// the c10 namespace but not any nontrivial files.
namespace c10 {} // namespace c10
namespace c10 {
namespace cuda {}
} // namespace c10
namespace c10 {
namespace hip {}
} // namespace c10
namespace c10 {}
namespace c10::cuda {}
namespace c10::hip {}
// Since C10 is the core library for caffe2 (and aten), we will simply reroute
// all abstractions defined in c10 to be available in caffe2 as well.
@ -170,11 +166,9 @@ using namespace c10;
namespace at {
using namespace c10;
}
namespace at {
namespace cuda {
namespace at::cuda {
using namespace c10::cuda;
}
} // namespace at
} // namespace at::cuda
// WARNING!!! THIS IS A GIANT HACK!!!
// This line means you cannot simultaneously include c10/hip
@ -184,11 +178,9 @@ using namespace c10::cuda;
// from at::cuda. This namespace makes that happen. When
// HIPIFY is no longer out-of-place, we can switch the cuda
// here to hip and everyone is happy.
namespace at {
namespace cuda {
namespace at::cuda {
using namespace c10::hip;
}
} // namespace at
} // namespace at::cuda
// C10_LIKELY/C10_UNLIKELY
//

View File

@ -37,7 +37,7 @@
namespace c10 {
// in c++17 std::result_of has been superceded by std::invoke_result. Since
// in c++17 std::result_of has been superseded by std::invoke_result. Since
// c++20, std::result_of is removed.
template <typename F, typename... args>
#if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L