mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[c10] Use nested namespace in c10/cuda (#116464)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/116464 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
93b86bf531
commit
53e32d12c4
@ -4,8 +4,7 @@
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/functional.h>
|
||||
#endif
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
#ifdef THRUST_DEVICE_LOWER_BOUND_WORKS
|
||||
template <typename Iter, typename Scalar>
|
||||
__forceinline__ __device__ Iter
|
||||
@ -29,5 +28,4 @@ __device__ Iter lower_bound(Iter start, Iter end, Scalar value) {
|
||||
return end;
|
||||
}
|
||||
#endif // THRUST_DEVICE_LOWER_BOUND_WORKS
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -10,9 +10,7 @@
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace CUDACachingAllocator {
|
||||
namespace c10::cuda::CUDACachingAllocator {
|
||||
|
||||
// Environment config parser
|
||||
class C10_CUDA_API CUDAAllocatorConfig {
|
||||
@ -111,6 +109,4 @@ class C10_CUDA_API CUDAAllocatorConfig {
|
||||
// General caching allocator utilities
|
||||
C10_CUDA_API void setAllocatorSettings(const std::string& env);
|
||||
|
||||
} // namespace CUDACachingAllocator
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda::CUDACachingAllocator
|
||||
|
@ -26,9 +26,8 @@ class C10_CUDA_API FreeMemoryCallback {
|
||||
C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
|
||||
#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
|
||||
C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__);
|
||||
|
||||
namespace cuda {
|
||||
|
||||
} // namespace c10
|
||||
//
|
||||
// TODO: Turn this into an honest to goodness class. I briefly attempted to do
|
||||
// this, but it was a bit irritating to figure out how to also correctly
|
||||
// apply pimpl pattern so I didn't have to leak any internal implementation
|
||||
@ -42,7 +41,7 @@ namespace cuda {
|
||||
// not counted as a word boundary, so you would otherwise have to list each
|
||||
// of these functions.
|
||||
|
||||
namespace CUDACachingAllocator {
|
||||
namespace c10::cuda::CUDACachingAllocator {
|
||||
|
||||
extern const size_t kLargeBuffer;
|
||||
|
||||
@ -445,6 +444,4 @@ inline void enablePeerAccess(int dev, int dev_to_access) {
|
||||
return get()->enablePeerAccess(dev, dev_to_access);
|
||||
}
|
||||
|
||||
} // namespace CUDACachingAllocator
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda::CUDACachingAllocator
|
||||
|
@ -3,8 +3,7 @@
|
||||
#include <c10/cuda/CUDAException.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
#ifdef TORCH_USE_CUDA_DSA
|
||||
// Copy string from `src` to `dst`
|
||||
@ -94,5 +93,4 @@ static __device__ void dsa_add_new_assertion_failure(
|
||||
#define CUDA_KERNEL_ASSERT2(condition) assert(condition)
|
||||
#endif
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -16,8 +16,7 @@
|
||||
constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10;
|
||||
constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512;
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
/// Holds information about any device-side assertions that fail.
|
||||
/// Held in managed memory and access by both the CPU and the GPU.
|
||||
@ -143,8 +142,7 @@ class C10_CUDA_API CUDAKernelLaunchRegistry {
|
||||
|
||||
std::string c10_retrieve_device_side_assertion_info();
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
||||
// Each kernel launched with TORCH_DSA_KERNEL_LAUNCH
|
||||
// requires the same input arguments. We introduce the following macro to
|
||||
|
@ -86,8 +86,7 @@ class C10_CUDA_API CUDAError : public c10::Error {
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK(); \
|
||||
} while (0)
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
/// In the event of a CUDA failure, formats a nice error message about that
|
||||
/// failure and also checks for device-side assertion failures
|
||||
@ -98,5 +97,4 @@ C10_CUDA_API void c10_cuda_check_implementation(
|
||||
const int line_number,
|
||||
const bool include_device_assertions);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -12,8 +12,7 @@
|
||||
#include <c10/cuda/CUDAException.h>
|
||||
#include <c10/cuda/CUDAMacros.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
// NB: In the past, we were inconsistent about whether or not this reported
|
||||
// an error if there were driver problems are not. Based on experience
|
||||
@ -114,5 +113,4 @@ C10_CUDA_API void __inline__ stream_synchronize(cudaStream_t stream) {
|
||||
C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index);
|
||||
C10_CUDA_API c10::optional<DeviceIndex> getDeviceIndexWithPrimaryContext();
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -7,8 +7,7 @@
|
||||
// CUDA Graphs utils used by c10 and aten.
|
||||
// aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
using CaptureId_t = unsigned long long;
|
||||
|
||||
@ -89,5 +88,4 @@ inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -8,8 +8,7 @@
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
// This code is kind of boilerplatey. See Note [Whither the DeviceGuard
|
||||
// boilerplate]
|
||||
@ -301,5 +300,4 @@ struct CUDAMultiStreamGuard {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -18,9 +18,7 @@
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
#endif /* __HIPCC__ */
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace compat {
|
||||
namespace c10::cuda::compat {
|
||||
|
||||
__MATH_FUNCTIONS_DECL__ float abs(float x) {
|
||||
return ::fabsf(x);
|
||||
@ -149,8 +147,6 @@ __MATH_FUNCTIONS_DECL__ double normcdf(double x) {
|
||||
return ::normcdf(x);
|
||||
}
|
||||
|
||||
} // namespace compat
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda::compat
|
||||
|
||||
#endif
|
||||
|
@ -6,9 +6,7 @@
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
|
||||
C10_CUDA_API std::mutex* getFreeMutex();
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -52,8 +52,7 @@
|
||||
* a kernel on the same stream from two different threads.
|
||||
*/
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
static constexpr int max_compile_time_stream_priorities = 4;
|
||||
|
||||
@ -260,8 +259,7 @@ C10_API void setCurrentCUDAStream(CUDAStream stream);
|
||||
|
||||
C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
|
@ -35,8 +35,7 @@
|
||||
_(nvmlDeviceGetNvLinkRemotePciInfo_v2) \
|
||||
_(nvmlDeviceGetComputeRunningProcesses)
|
||||
|
||||
namespace c10 {
|
||||
namespace cuda {
|
||||
namespace c10::cuda {
|
||||
|
||||
struct DriverAPI {
|
||||
#define CREATE_MEMBER(name) decltype(&name) name##_;
|
||||
@ -47,5 +46,4 @@ struct DriverAPI {
|
||||
static void* get_nvml_handle();
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace c10
|
||||
} // namespace c10::cuda
|
||||
|
@ -152,13 +152,9 @@
|
||||
|
||||
// Simply define the namespace, in case a dependent library want to refer to
|
||||
// the c10 namespace but not any nontrivial files.
|
||||
namespace c10 {} // namespace c10
|
||||
namespace c10 {
|
||||
namespace cuda {}
|
||||
} // namespace c10
|
||||
namespace c10 {
|
||||
namespace hip {}
|
||||
} // namespace c10
|
||||
namespace c10 {}
|
||||
namespace c10::cuda {}
|
||||
namespace c10::hip {}
|
||||
|
||||
// Since C10 is the core library for caffe2 (and aten), we will simply reroute
|
||||
// all abstractions defined in c10 to be available in caffe2 as well.
|
||||
@ -170,11 +166,9 @@ using namespace c10;
|
||||
namespace at {
|
||||
using namespace c10;
|
||||
}
|
||||
namespace at {
|
||||
namespace cuda {
|
||||
namespace at::cuda {
|
||||
using namespace c10::cuda;
|
||||
}
|
||||
} // namespace at
|
||||
} // namespace at::cuda
|
||||
|
||||
// WARNING!!! THIS IS A GIANT HACK!!!
|
||||
// This line means you cannot simultaneously include c10/hip
|
||||
@ -184,11 +178,9 @@ using namespace c10::cuda;
|
||||
// from at::cuda. This namespace makes that happen. When
|
||||
// HIPIFY is no longer out-of-place, we can switch the cuda
|
||||
// here to hip and everyone is happy.
|
||||
namespace at {
|
||||
namespace cuda {
|
||||
namespace at::cuda {
|
||||
using namespace c10::hip;
|
||||
}
|
||||
} // namespace at
|
||||
} // namespace at::cuda
|
||||
|
||||
// C10_LIKELY/C10_UNLIKELY
|
||||
//
|
||||
|
@ -37,7 +37,7 @@
|
||||
|
||||
namespace c10 {
|
||||
|
||||
// in c++17 std::result_of has been superceded by std::invoke_result. Since
|
||||
// in c++17 std::result_of has been superseded by std::invoke_result. Since
|
||||
// c++20, std::result_of is removed.
|
||||
template <typename F, typename... args>
|
||||
#if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L
|
||||
|
Reference in New Issue
Block a user