[c10] Use nested namespace in c10/cuda (#116464)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116464 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2023-12-27 09:52:40 -08:00
parent 93b86bf531
commit 53e32d12c4
15 changed files with 36 additions and 75 deletions
--- a/c10/cuda/CUDAAlgorithm.h
+++ b/c10/cuda/CUDAAlgorithm.h
@ -4,8 +4,7 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #endif
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 #ifdef THRUST_DEVICE_LOWER_BOUND_WORKS
 template <typename Iter, typename Scalar>
 __forceinline__ __device__ Iter
@ -29,5 +28,4 @@ __device__ Iter lower_bound(Iter start, Iter end, Scalar value) {
  return end;
 }
 #endif // THRUST_DEVICE_LOWER_BOUND_WORKS
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAAllocatorConfig.h
+++ b/c10/cuda/CUDAAllocatorConfig.h
@ -10,9 +10,7 @@
 #include <atomic>
 #include <vector>
-namespace c10 {
+namespace c10::cuda::CUDACachingAllocator {
 namespace cuda {
 namespace CUDACachingAllocator {
 // Environment config parser
 class C10_CUDA_API CUDAAllocatorConfig {
@ -111,6 +109,4 @@ class C10_CUDA_API CUDAAllocatorConfig {
 // General caching allocator utilities
 C10_CUDA_API void setAllocatorSettings(const std::string& env);
-} // namespace CUDACachingAllocator
+} // namespace c10::cuda::CUDACachingAllocator
 } // namespace cuda
 } // namespace c10
--- a/c10/cuda/CUDACachingAllocator.h
+++ b/c10/cuda/CUDACachingAllocator.h
@ -26,9 +26,8 @@ class C10_CUDA_API FreeMemoryCallback {
 C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
 #define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
  C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__);
-
+} // namespace c10
-namespace cuda {
+  //
 // TODO: Turn this into an honest to goodness class. I briefly attempted to do
 // this, but it was a bit irritating to figure out how to also correctly
 // apply pimpl pattern so I didn't have to leak any internal implementation
@ -42,7 +41,7 @@ namespace cuda {
 // not counted as a word boundary, so you would otherwise have to list each
 // of these functions.
-namespace CUDACachingAllocator {
+namespace c10::cuda::CUDACachingAllocator {
 extern const size_t kLargeBuffer;
@ -445,6 +444,4 @@ inline void enablePeerAccess(int dev, int dev_to_access) {
  return get()->enablePeerAccess(dev, dev_to_access);
 }
-} // namespace CUDACachingAllocator
+} // namespace c10::cuda::CUDACachingAllocator
 } // namespace cuda
 } // namespace c10
--- a/c10/cuda/CUDADeviceAssertion.h
+++ b/c10/cuda/CUDADeviceAssertion.h
@ -3,8 +3,7 @@
 #include <c10/cuda/CUDAException.h>
 #include <c10/macros/Macros.h>
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 #ifdef TORCH_USE_CUDA_DSA
 // Copy string from `src` to `dst`
@ -94,5 +93,4 @@ static __device__ void dsa_add_new_assertion_failure(
 #define CUDA_KERNEL_ASSERT2(condition) assert(condition)
 #endif
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDADeviceAssertionHost.h
+++ b/c10/cuda/CUDADeviceAssertionHost.h
@ -16,8 +16,7 @@
 constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10;
 constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512;
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 /// Holds information about any device-side assertions that fail.
 /// Held in managed memory and access by both the CPU and the GPU.
@ -143,8 +142,7 @@ class C10_CUDA_API CUDAKernelLaunchRegistry {
 std::string c10_retrieve_device_side_assertion_info();
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
 // Each kernel launched with TORCH_DSA_KERNEL_LAUNCH
 // requires the same input arguments. We introduce the following macro to
--- a/c10/cuda/CUDAException.h
+++ b/c10/cuda/CUDAException.h
@ -86,8 +86,7 @@ class C10_CUDA_API CUDAError : public c10::Error {
    C10_CUDA_KERNEL_LAUNCH_CHECK();                                   \
  } while (0)
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 /// In the event of a CUDA failure, formats a nice error message about that
 /// failure and also checks for device-side assertion failures
@ -98,5 +97,4 @@ C10_CUDA_API void c10_cuda_check_implementation(
    const int line_number,
    const bool include_device_assertions);
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAFunctions.h
+++ b/c10/cuda/CUDAFunctions.h
@ -12,8 +12,7 @@
 #include <c10/cuda/CUDAException.h>
 #include <c10/cuda/CUDAMacros.h>
 #include <cuda_runtime_api.h>
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 // NB: In the past, we were inconsistent about whether or not this reported
 // an error if there were driver problems are not.  Based on experience
@ -114,5 +113,4 @@ C10_CUDA_API void __inline__ stream_synchronize(cudaStream_t stream) {
 C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index);
 C10_CUDA_API c10::optional<DeviceIndex> getDeviceIndexWithPrimaryContext();
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAGraphsC10Utils.h
+++ b/c10/cuda/CUDAGraphsC10Utils.h
@ -7,8 +7,7 @@
 // CUDA Graphs utils used by c10 and aten.
 // aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 using CaptureId_t = unsigned long long;
@ -89,5 +88,4 @@ inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
 #endif
 }
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAGuard.h
+++ b/c10/cuda/CUDAGuard.h
@ -8,8 +8,7 @@
 #include <cstddef>
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 // This code is kind of boilerplatey.  See Note [Whither the DeviceGuard
 // boilerplate]
@ -301,5 +300,4 @@ struct CUDAMultiStreamGuard {
  }
 };
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAMathCompat.h
+++ b/c10/cuda/CUDAMathCompat.h
@ -18,9 +18,7 @@
 #endif /* __CUDACC_RTC__ */
 #endif /* __HIPCC__ */
-namespace c10 {
+namespace c10::cuda::compat {
 namespace cuda {
 namespace compat {
 __MATH_FUNCTIONS_DECL__ float abs(float x) {
  return ::fabsf(x);
@ -149,8 +147,6 @@ __MATH_FUNCTIONS_DECL__ double normcdf(double x) {
  return ::normcdf(x);
 }
-} // namespace compat
+} // namespace c10::cuda::compat
 } // namespace cuda
 } // namespace c10
 #endif
--- a/c10/cuda/CUDAMiscFunctions.h
+++ b/c10/cuda/CUDAMiscFunctions.h
@ -6,9 +6,7 @@
 #include <mutex>
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
 C10_CUDA_API std::mutex* getFreeMutex();
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/cuda/CUDAStream.h
+++ b/c10/cuda/CUDAStream.h
@ -52,8 +52,7 @@
 * a kernel on the same stream from two different threads.
 */
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 static constexpr int max_compile_time_stream_priorities = 4;
@ -260,8 +259,7 @@ C10_API void setCurrentCUDAStream(CUDAStream stream);
 C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s);
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
 namespace std {
 template <>
--- a/c10/cuda/driver_api.h
+++ b/c10/cuda/driver_api.h
@ -35,8 +35,7 @@
  _(nvmlDeviceGetNvLinkRemotePciInfo_v2) \
  _(nvmlDeviceGetComputeRunningProcesses)
-namespace c10 {
+namespace c10::cuda {
 namespace cuda {
 struct DriverAPI {
 #define CREATE_MEMBER(name) decltype(&name) name##_;
@ -47,5 +46,4 @@ struct DriverAPI {
  static void* get_nvml_handle();
 };
-} // namespace cuda
+} // namespace c10::cuda
 } // namespace c10
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@ -152,13 +152,9 @@
 // Simply define the namespace, in case a dependent library want to refer to
 // the c10 namespace but not any nontrivial files.
-namespace c10 {} // namespace c10
+namespace c10 {}
-namespace c10 {
+namespace c10::cuda {}
-namespace cuda {}
+namespace c10::hip {}
 } // namespace c10
 namespace c10 {
 namespace hip {}
 } // namespace c10
 // Since C10 is the core library for caffe2 (and aten), we will simply reroute
 // all abstractions defined in c10 to be available in caffe2 as well.
@ -170,11 +166,9 @@ using namespace c10;
 namespace at {
 using namespace c10;
 }
-namespace at {
+namespace at::cuda {
 namespace cuda {
 using namespace c10::cuda;
-}
+} // namespace at::cuda
 } // namespace at
 // WARNING!!! THIS IS A GIANT HACK!!!
 // This line means you cannot simultaneously include c10/hip
@ -184,11 +178,9 @@ using namespace c10::cuda;
 // from at::cuda.  This namespace makes that happen.  When
 // HIPIFY is no longer out-of-place, we can switch the cuda
 // here to hip and everyone is happy.
-namespace at {
+namespace at::cuda {
 namespace cuda {
 using namespace c10::hip;
-}
+} // namespace at::cuda
 } // namespace at
 // C10_LIKELY/C10_UNLIKELY
 //
--- a/c10/util/C++17.h
+++ b/c10/util/C++17.h
@ -37,7 +37,7 @@
 namespace c10 {
-// in c++17 std::result_of has been superceded by std::invoke_result.  Since
+// in c++17 std::result_of has been superseded by std::invoke_result.  Since
 // c++20, std::result_of is removed.
 template <typename F, typename... args>
 #if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L