mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[1/N] Remove inclusion of ATen/core/Array.h (#122064)
The functionality of Array.h is largely overlapped with std::array and it should be safe to use std::array instead. Pull Request resolved: https://github.com/pytorch/pytorch/pull/122064 Approved by: https://github.com/ezyang
This commit is contained in:
@ -11,6 +11,7 @@
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
#include <ATen/core/Array.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <cmath>
|
||||
@ -21,10 +22,10 @@ namespace at {
|
||||
// typedefs for holding vector data
|
||||
namespace detail {
|
||||
|
||||
typedef at::detail::Array<uint32_t, 4> UINT4;
|
||||
typedef at::detail::Array<uint32_t, 2> UINT2;
|
||||
typedef at::detail::Array<double, 2> DOUBLE2;
|
||||
typedef at::detail::Array<float, 2> FLOAT2;
|
||||
typedef std::array<uint32_t, 4> UINT4;
|
||||
typedef std::array<uint32_t, 2> UINT2;
|
||||
typedef std::array<double, 2> DOUBLE2;
|
||||
typedef std::array<float, 2> FLOAT2;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@ -79,7 +80,7 @@ public:
|
||||
uint64_t subsequence = 0) {
|
||||
key_[0] = static_cast<uint32_t>(seed);
|
||||
key_[1] = static_cast<uint32_t>(seed >> 32);
|
||||
counter_ = detail::UINT4(0);
|
||||
counter_ = detail::UINT4{};
|
||||
counter_[2] = static_cast<uint32_t>(subsequence);
|
||||
counter_[3] = static_cast<uint32_t>(subsequence >> 32);
|
||||
STATE = 0;
|
||||
|
@ -296,7 +296,7 @@ void gpu_kernel_impl_nocast(TensorIteratorBase& iter, const func_t& f) {
|
||||
TORCH_INTERNAL_ASSERT(iter.noutputs() == 1);
|
||||
TORCH_INTERNAL_ASSERT(!needs_dynamic_casting<func_t>::check(iter));
|
||||
|
||||
at::detail::Array<char*, ntensors> data;
|
||||
std::array<char*, ntensors> data;
|
||||
for (int i = 0; i < ntensors; i++) {
|
||||
data[i] = (char*)iter.data_ptr(i);
|
||||
}
|
||||
@ -313,7 +313,7 @@ void gpu_kernel_impl_nocast(TensorIteratorBase& iter, const func_t& f) {
|
||||
launch_legacy_kernel<128, unroll_factor>(numel, [=] GPU_LAMBDA(int idx) {
|
||||
auto offsets = offset_calc.get(idx);
|
||||
arg0_t* out = (arg0_t*)(data[0] + offsets[0]);
|
||||
*out = invoke(f, &data.data[1], &offsets.data[1], 1);
|
||||
*out = invoke(f, &data[1], &offsets[1], 1);
|
||||
});
|
||||
}
|
||||
|
||||
@ -330,7 +330,7 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
||||
TORCH_INTERNAL_ASSERT(iter.ninputs() == traits::arity);
|
||||
TORCH_INTERNAL_ASSERT(iter.noutputs() == 1);
|
||||
|
||||
at::detail::Array<char*, ntensors> data;
|
||||
std::array<char*, ntensors> data;
|
||||
for (int i = 0; i < ntensors; i++) {
|
||||
data[i] = (char*)iter.data_ptr(i);
|
||||
}
|
||||
@ -341,16 +341,16 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
||||
|
||||
if (contiguous) {
|
||||
#ifdef USE_ROCM
|
||||
at::detail::Array<ScalarType, ntensors> dtypes;
|
||||
std::array<ScalarType, ntensors> dtypes;
|
||||
auto inner_strides = iter.get_inner_strides();
|
||||
at::detail::Array<int, ntensors> strides;
|
||||
std::array<int, ntensors> strides;
|
||||
for (int i = 0; i < ntensors; i++) {
|
||||
dtypes[i] = iter.dtype(i);
|
||||
strides[i] = inner_strides[i];
|
||||
}
|
||||
launch_legacy_kernel<512, 1>(numel, [=]GPU_LAMBDA(int idx) {
|
||||
void* out = data[0] + strides[0] * idx;
|
||||
arg0_t result = invoke(f, &data.data[1], &strides.data[1], &dtypes.data[1], idx);
|
||||
arg0_t result = invoke(f, &data[1], &strides[1], &dtypes[1], idx);
|
||||
c10::cast_and_store<arg0_t>(dtypes[0], out, result);
|
||||
});
|
||||
#else
|
||||
@ -368,7 +368,7 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
||||
storer);
|
||||
#endif
|
||||
} else {
|
||||
at::detail::Array<ScalarType, ntensors> dtypes;
|
||||
std::array<ScalarType, ntensors> dtypes;
|
||||
for (int i = 0; i < ntensors; i++) {
|
||||
dtypes[i] = iter.dtype(i);
|
||||
}
|
||||
@ -376,7 +376,7 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
||||
launch_legacy_kernel<128, 4>(numel, [=] GPU_LAMBDA(int idx) {
|
||||
auto offsets = offset_calc.get(idx);
|
||||
void* out = data[0] + offsets[0];
|
||||
arg0_t result = invoke(f, &data.data[1], &offsets.data[1], &dtypes.data[1], 1);
|
||||
arg0_t result = invoke(f, &data[1], &offsets[1], &dtypes[1], 1);
|
||||
c10::cast_and_store<arg0_t>(dtypes[0], out, result);
|
||||
});
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <ATen/native/cuda/IndexKernel.h>
|
||||
#include <ATen/native/IndexKernel.h>
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
#include <ATen/core/TensorBase.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
@ -68,9 +69,9 @@ void gpu_index_kernel(TensorIteratorBase& iter, const IntArrayRef index_size, co
|
||||
return;
|
||||
}
|
||||
|
||||
auto sizes = at::detail::Array<int64_t, MAX_DIMS>(0);
|
||||
auto strides = at::detail::Array<int64_t, MAX_DIMS>(0);
|
||||
auto index_ptrs = at::detail::Array<char*, MAX_DIMS>(nullptr);
|
||||
auto sizes = std::array<int64_t, MAX_DIMS>{};
|
||||
auto strides = std::array<int64_t, MAX_DIMS>{};
|
||||
auto index_ptrs = std::array<char*, MAX_DIMS>{};
|
||||
for (unsigned i = 0; i < num_indices; i++) {
|
||||
sizes[i] = index_size[i];
|
||||
strides[i] = index_stride[i];
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include <ATen/detail/CUDAHooksInterface.h>
|
||||
#include <ATen/native/SpectralOpsUtils.h>
|
||||
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace at::native {
|
||||
@ -17,7 +17,7 @@ namespace at::native {
|
||||
// In mirrored dims, maps linear index i to (n - i) % n
|
||||
template <typename index_t>
|
||||
struct HermitianSymmetryOffsetCalculator {
|
||||
using offset_type = at::detail::Array<index_t, 1>;
|
||||
using offset_type = std::array<index_t, 1>;
|
||||
using dim_type = std::remove_cv_t<decltype(MAX_DIMS)>;
|
||||
dim_type dims;
|
||||
at::cuda::detail::IntDivider<index_t> sizes_[MAX_DIMS];
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include <array>
|
||||
#include <gtest/gtest.h>
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/native/cuda/Loops.cuh>
|
||||
#include <ATen/native/cuda/MemoryAccess.cuh>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/core/Array.h>
|
||||
|
||||
using namespace at::native;
|
||||
using namespace at::native::memory;
|
||||
@ -77,7 +77,7 @@ TEST(TestVectorizedMemoryAccess, CanVectorizeUpTo) {
|
||||
template <typename scalar_t, int vec_size>
|
||||
__global__ void vectorized_copy(scalar_t *dst, scalar_t *src) {
|
||||
static_assert(vec_size <= thread_work_size() && thread_work_size() % vec_size == 0, "Invalid vec_size");
|
||||
using array_t = at::detail::Array<char*, 2>;
|
||||
using array_t = std::array<char*, 2>;
|
||||
array_t data;
|
||||
data[0] = reinterpret_cast<char *>(dst);
|
||||
data[1] = reinterpret_cast<char *>(src);
|
||||
|
Reference in New Issue
Block a user