mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Lint] Update clang-format to 19.1.4 (#153889)
All changes other than the one to `tools/linter/adapters/s3_init_config.json` are generated by newer clang-format Pull Request resolved: https://github.com/pytorch/pytorch/pull/153889 Approved by: https://github.com/cyyever, https://github.com/atalman
This commit is contained in:
committed by
PyTorch MergeBot
parent
d869ea11e0
commit
c4d1ff02f8
@ -395,8 +395,7 @@ class Vectorized<double> {
|
|||||||
})} // Comparison using the _CMP_**_OQ predicate.
|
})} // Comparison using the _CMP_**_OQ predicate.
|
||||||
// `O`: get false if an operand is NaN
|
// `O`: get false if an operand is NaN
|
||||||
// `Q`: do not raise if an operand is NaN
|
// `Q`: do not raise if an operand is NaN
|
||||||
Vectorized<double>
|
Vectorized<double> operator==(const Vectorized<double>& other) const {
|
||||||
operator==(const Vectorized<double>& other) const {
|
|
||||||
svbool_t mask = svcmpeq_f64(ptrue, values, other);
|
svbool_t mask = svcmpeq_f64(ptrue, values, other);
|
||||||
return svsel_f64(mask, ALL_F64_TRUE_MASK, ALL_F64_FALSE_MASK);
|
return svsel_f64(mask, ALL_F64_TRUE_MASK, ALL_F64_FALSE_MASK);
|
||||||
}
|
}
|
||||||
|
@ -497,8 +497,7 @@ class Vectorized<float> {
|
|||||||
})} // Comparison using the _CMP_**_OQ predicate.
|
})} // Comparison using the _CMP_**_OQ predicate.
|
||||||
// `O`: get false if an operand is NaN
|
// `O`: get false if an operand is NaN
|
||||||
// `Q`: do not raise if an operand is NaN
|
// `Q`: do not raise if an operand is NaN
|
||||||
Vectorized<float>
|
Vectorized<float> operator==(const Vectorized<float>& other) const {
|
||||||
operator==(const Vectorized<float>& other) const {
|
|
||||||
svbool_t mask = svcmpeq_f32(ptrue, values, other);
|
svbool_t mask = svcmpeq_f32(ptrue, values, other);
|
||||||
return svsel_f32(mask, ALL_F32_TRUE_MASK, ALL_F32_FALSE_MASK);
|
return svsel_f32(mask, ALL_F32_TRUE_MASK, ALL_F32_FALSE_MASK);
|
||||||
}
|
}
|
||||||
|
@ -97,14 +97,14 @@ class Vectorized<float> {
|
|||||||
const Vectorized<float>& a,
|
const Vectorized<float>& a,
|
||||||
const Vectorized<float>& b) {
|
const Vectorized<float>& b) {
|
||||||
Vectorized<float> vec;
|
Vectorized<float> vec;
|
||||||
vec.values =
|
vec.values = BlendRegs < 0,
|
||||||
BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 1,
|
||||||
BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 2,
|
||||||
BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 3,
|
||||||
BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
static Vectorized<float> blendv(
|
static Vectorized<float> blendv(
|
||||||
|
@ -13,8 +13,7 @@ inline namespace CPU_CAPABILITY {
|
|||||||
template <
|
template <
|
||||||
typename VecT,
|
typename VecT,
|
||||||
typename ValueT,
|
typename ValueT,
|
||||||
template <int, bool>
|
template <int, bool> typename BlendRegs,
|
||||||
typename BlendRegs,
|
|
||||||
typename Derived>
|
typename Derived>
|
||||||
struct Vectorized16 {
|
struct Vectorized16 {
|
||||||
protected:
|
protected:
|
||||||
@ -54,23 +53,23 @@ struct Vectorized16 {
|
|||||||
template <int64_t mask>
|
template <int64_t mask>
|
||||||
static Derived blend(const Derived& a, const Derived& b) {
|
static Derived blend(const Derived& a, const Derived& b) {
|
||||||
Derived vec;
|
Derived vec;
|
||||||
vec.values =
|
vec.values = BlendRegs < 0,
|
||||||
BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 1,
|
||||||
BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 2,
|
||||||
BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 3,
|
||||||
BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
|
|
||||||
vec.values =
|
vec.values = BlendRegs < 4,
|
||||||
BlendRegs<4, (mask & 0x10) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x10) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 5,
|
||||||
BlendRegs<5, (mask & 0x20) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x20) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 6,
|
||||||
BlendRegs<6, (mask & 0x40) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x40) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
vec.values =
|
vec.values = BlendRegs < 7,
|
||||||
BlendRegs<7, (mask & 0x80) != 0>::impl(a.values, b.values, vec.values);
|
(mask & 0x80) != 0 > ::impl(a.values, b.values, vec.values);
|
||||||
|
|
||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
|
@ -780,8 +780,8 @@ void MPSProfiler::handleIntSignal(int signal) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// used to capture sigint signal to log profiling stats
|
// used to capture sigint signal to log profiling stats
|
||||||
struct sigaction MPSProfiler::currentSigint {};
|
struct sigaction MPSProfiler::currentSigint{};
|
||||||
struct sigaction MPSProfiler::previousSigint {};
|
struct sigaction MPSProfiler::previousSigint{};
|
||||||
|
|
||||||
bool MPSProfiler::isCapturing() const {
|
bool MPSProfiler::isCapturing() const {
|
||||||
return [captureManager isCapturing];
|
return [captureManager isCapturing];
|
||||||
|
@ -216,7 +216,7 @@ struct RNNDescriptorParams {
|
|||||||
cudnnDataType_t datatype,
|
cudnnDataType_t datatype,
|
||||||
cudnnDataType_t input_datatype) {
|
cudnnDataType_t input_datatype) {
|
||||||
#endif
|
#endif
|
||||||
this->set_mode(mode);
|
this -> set_mode(mode);
|
||||||
#ifdef USE_CUDNN_RNN_V8_API
|
#ifdef USE_CUDNN_RNN_V8_API
|
||||||
this->input_size = input_size;
|
this->input_size = input_size;
|
||||||
this->packed = packed;
|
this->packed = packed;
|
||||||
|
@ -153,12 +153,12 @@ kernel void searchsorted(
|
|||||||
constant INPUT_T * data_in [[buffer(0)]], \
|
constant INPUT_T * data_in [[buffer(0)]], \
|
||||||
constant INPUT_T * data_bd [[buffer(1)]], \
|
constant INPUT_T * data_bd [[buffer(1)]], \
|
||||||
device OUTPUT_T * data_out [[buffer(2)]], \
|
device OUTPUT_T * data_out [[buffer(2)]], \
|
||||||
constant int64_t & idim_in [[buffer(3)]], \
|
constant int64_t& idim_in [[buffer(3)]], \
|
||||||
constant int64_t & idim_bd [[buffer(4)]], \
|
constant int64_t& idim_bd [[buffer(4)]], \
|
||||||
constant int64_t & numel_in [[buffer(5)]], \
|
constant int64_t& numel_in [[buffer(5)]], \
|
||||||
constant int64_t & right [[buffer(6)]], \
|
constant int64_t& right [[buffer(6)]], \
|
||||||
constant int64_t & is_1d_boundaries [[buffer(7)]], \
|
constant int64_t& is_1d_boundaries [[buffer(7)]], \
|
||||||
constant int64_t * data_sort [[buffer(8)]], \
|
constant int64_t* data_sort [[buffer(8)]], \
|
||||||
uint2 tgid [[threadgroup_position_in_grid]], \
|
uint2 tgid [[threadgroup_position_in_grid]], \
|
||||||
uint2 tid2 [[thread_position_in_threadgroup]], \
|
uint2 tid2 [[thread_position_in_threadgroup]], \
|
||||||
uint2 tptg [[threads_per_threadgroup]]); \
|
uint2 tptg [[threads_per_threadgroup]]); \
|
||||||
@ -167,11 +167,11 @@ kernel void searchsorted(
|
|||||||
constant INPUT_T * data_in [[buffer(0)]], \
|
constant INPUT_T * data_in [[buffer(0)]], \
|
||||||
constant INPUT_T * data_bd [[buffer(1)]], \
|
constant INPUT_T * data_bd [[buffer(1)]], \
|
||||||
device OUTPUT_T * data_out [[buffer(2)]], \
|
device OUTPUT_T * data_out [[buffer(2)]], \
|
||||||
constant int64_t & idim_in [[buffer(3)]], \
|
constant int64_t& idim_in [[buffer(3)]], \
|
||||||
constant int64_t & idim_bd [[buffer(4)]], \
|
constant int64_t& idim_bd [[buffer(4)]], \
|
||||||
constant int64_t & numel_in [[buffer(5)]], \
|
constant int64_t& numel_in [[buffer(5)]], \
|
||||||
constant int64_t & right [[buffer(6)]], \
|
constant int64_t& right [[buffer(6)]], \
|
||||||
constant int64_t & is_1d_boundaries [[buffer(7)]], \
|
constant int64_t& is_1d_boundaries [[buffer(7)]], \
|
||||||
uint2 tgid [[threadgroup_position_in_grid]], \
|
uint2 tgid [[threadgroup_position_in_grid]], \
|
||||||
uint2 tid2 [[thread_position_in_threadgroup]], \
|
uint2 tid2 [[thread_position_in_threadgroup]], \
|
||||||
uint2 tptg [[threads_per_threadgroup]]);
|
uint2 tptg [[threads_per_threadgroup]]);
|
||||||
|
@ -94,21 +94,21 @@ kernel void histogramdd(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define REGISTER_HISTOGRAMDD_OP(DTYPE) \
|
#define REGISTER_HISTOGRAMDD_OP(DTYPE) \
|
||||||
template [[host_name("histogramdd_" #DTYPE)]] kernel void \
|
template [[host_name("histogramdd_" #DTYPE)]] kernel void \
|
||||||
histogramdd<DTYPE>( \
|
histogramdd<DTYPE>( \
|
||||||
constant DTYPE * input_ [[buffer(0)]], \
|
constant DTYPE * input_ [[buffer(0)]], \
|
||||||
constant DTYPE * weight [[buffer(1)]], \
|
constant DTYPE * weight [[buffer(1)]], \
|
||||||
device DTYPE * local_out [[buffer(2)]], \
|
device DTYPE * local_out [[buffer(2)]], \
|
||||||
constant uint * offsets [[buffer(3)]], \
|
constant uint * offsets [[buffer(3)]], \
|
||||||
constant size_t & num_dims [[buffer(4)]], \
|
constant size_t& num_dims [[buffer(4)]], \
|
||||||
constant DTYPE * bin_seq [[buffer(5)]], \
|
constant DTYPE* bin_seq [[buffer(5)]], \
|
||||||
constant int64_t * num_bin_edges [[buffer(6)]], \
|
constant int64_t* num_bin_edges [[buffer(6)]], \
|
||||||
constant DTYPE * leftmost_edge [[buffer(7)]], \
|
constant DTYPE* leftmost_edge [[buffer(7)]], \
|
||||||
constant DTYPE * rightmost_edge [[buffer(8)]], \
|
constant DTYPE* rightmost_edge [[buffer(8)]], \
|
||||||
constant int64_t * local_out_strides [[buffer(9)]], \
|
constant int64_t* local_out_strides [[buffer(9)]], \
|
||||||
constant uint8_t & bin_selection_algorithm [[buffer(10)]], \
|
constant uint8_t& bin_selection_algorithm [[buffer(10)]], \
|
||||||
constant uint8_t & has_weight [[buffer(11)]], \
|
constant uint8_t& has_weight [[buffer(11)]], \
|
||||||
uint tid [[thread_position_in_grid]]);
|
uint tid [[thread_position_in_grid]]);
|
||||||
|
|
||||||
REGISTER_HISTOGRAMDD_OP(float);
|
REGISTER_HISTOGRAMDD_OP(float);
|
||||||
|
@ -209,11 +209,11 @@ kernel void triu_indices(
|
|||||||
template [[host_name(#NAME "_indices_" #DTYPE)]] kernel void \
|
template [[host_name(#NAME "_indices_" #DTYPE)]] kernel void \
|
||||||
NAME##_indices<DTYPE>( \
|
NAME##_indices<DTYPE>( \
|
||||||
device DTYPE * tensor, \
|
device DTYPE * tensor, \
|
||||||
constant int64_t & col_offset, \
|
constant int64_t& col_offset, \
|
||||||
constant int64_t & m_first_row, \
|
constant int64_t& m_first_row, \
|
||||||
constant int64_t & col, \
|
constant int64_t& col, \
|
||||||
constant int64_t & rectangle_size, \
|
constant int64_t& rectangle_size, \
|
||||||
constant int64_t & triu_size, \
|
constant int64_t& triu_size, \
|
||||||
uint linear_index [[thread_position_in_grid]])
|
uint linear_index [[thread_position_in_grid]])
|
||||||
|
|
||||||
INSTANTIATE_TRI_INDICES(triu, long);
|
INSTANTIATE_TRI_INDICES(triu, long);
|
||||||
|
@ -88,15 +88,13 @@ Tensor unsqueeze(const at::Tensor& self, int64_t dim) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create the params buffer
|
// Create the params buffer
|
||||||
struct Block block {
|
struct Block block{{
|
||||||
{
|
|
||||||
// Dimension to unsqueeze
|
// Dimension to unsqueeze
|
||||||
static_cast<int32_t>(dim),
|
static_cast<int32_t>(dim),
|
||||||
// Keep track of the channel in Image3D
|
// Keep track of the channel in Image3D
|
||||||
static_cast<int32_t>(
|
static_cast<int32_t>(
|
||||||
std::ceil(static_cast<float>(output_size[channel_index]) / 4)),
|
std::ceil(static_cast<float>(output_size[channel_index]) / 4)),
|
||||||
}
|
}};
|
||||||
};
|
|
||||||
|
|
||||||
api::UniformParamsBuffer params(context, block);
|
api::UniformParamsBuffer params(context, block);
|
||||||
|
|
||||||
|
@ -193,11 +193,11 @@ struct C10_API AutogradMetaFactory {
|
|||||||
C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
|
C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
|
||||||
C10_API AutogradMetaFactory* GetAutogradMetaFactory();
|
C10_API AutogradMetaFactory* GetAutogradMetaFactory();
|
||||||
|
|
||||||
struct C10_API AutogradMetaFactoryRegisterer {
|
struct C10_API AutogradMetaFactoryRegisterer{
|
||||||
explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory* factory) {
|
explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory * factory){
|
||||||
SetAutogradMetaFactory(factory);
|
SetAutogradMetaFactory(factory);
|
||||||
}
|
} // namespace impl
|
||||||
};
|
}; // namespace c10
|
||||||
|
|
||||||
} // namespace impl
|
} // namespace impl
|
||||||
|
|
||||||
|
@ -286,7 +286,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
|
|||||||
#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \
|
#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \
|
||||||
((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
|
((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
|
||||||
? (blocks_per_sm) \
|
? (blocks_per_sm) \
|
||||||
: ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) / \
|
: ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / \
|
||||||
(threads_per_block))))
|
(threads_per_block))))
|
||||||
// C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
|
// C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
|
||||||
#define C10_LAUNCH_BOUNDS_0 \
|
#define C10_LAUNCH_BOUNDS_0 \
|
||||||
|
@ -68,8 +68,7 @@ static_assert(
|
|||||||
} // namespace test_function_traits
|
} // namespace test_function_traits
|
||||||
|
|
||||||
struct MovableOnly {
|
struct MovableOnly {
|
||||||
constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */
|
constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */ }
|
||||||
}
|
|
||||||
MovableOnly(const MovableOnly&) = delete;
|
MovableOnly(const MovableOnly&) = delete;
|
||||||
MovableOnly(MovableOnly&&) = default;
|
MovableOnly(MovableOnly&&) = default;
|
||||||
MovableOnly& operator=(const MovableOnly&) = delete;
|
MovableOnly& operator=(const MovableOnly&) = delete;
|
||||||
|
@ -5,20 +5,20 @@
|
|||||||
|
|
||||||
#if !defined(_WIN32)
|
#if !defined(_WIN32)
|
||||||
static bool file_exists(const char* path) {
|
static bool file_exists(const char* path) {
|
||||||
struct stat st {};
|
struct stat st{};
|
||||||
return stat(path, &st) == 0 && S_ISREG(st.st_mode);
|
return stat(path, &st) == 0 && S_ISREG(st.st_mode);
|
||||||
}
|
}
|
||||||
static bool directory_exists(const char* path) {
|
static bool directory_exists(const char* path) {
|
||||||
struct stat st {};
|
struct stat st{};
|
||||||
return stat(path, &st) == 0 && S_ISDIR(st.st_mode);
|
return stat(path, &st) == 0 && S_ISDIR(st.st_mode);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static bool file_exists(const char* path) {
|
static bool file_exists(const char* path) {
|
||||||
struct _stat st {};
|
struct _stat st{};
|
||||||
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFREG);
|
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFREG);
|
||||||
}
|
}
|
||||||
static bool directory_exists(const char* path) {
|
static bool directory_exists(const char* path) {
|
||||||
struct _stat st {};
|
struct _stat st{};
|
||||||
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFDIR);
|
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFDIR);
|
||||||
}
|
}
|
||||||
#endif // !defined(_WIN32)
|
#endif // !defined(_WIN32)
|
||||||
|
@ -59,7 +59,7 @@ inline time_t getTime(bool allow_monotonic = false) {
|
|||||||
.count();
|
.count();
|
||||||
#else
|
#else
|
||||||
// clock_gettime is *much* faster than std::chrono implementation on Linux
|
// clock_gettime is *much* faster than std::chrono implementation on Linux
|
||||||
struct timespec t {};
|
struct timespec t{};
|
||||||
auto mode = CLOCK_REALTIME;
|
auto mode = CLOCK_REALTIME;
|
||||||
if (allow_monotonic) {
|
if (allow_monotonic) {
|
||||||
mode = CLOCK_MONOTONIC;
|
mode = CLOCK_MONOTONIC;
|
||||||
|
@ -116,8 +116,8 @@ class C10_API Error : public std::exception {
|
|||||||
|
|
||||||
class C10_API Warning {
|
class C10_API Warning {
|
||||||
public:
|
public:
|
||||||
class C10_API UserWarning {};
|
class C10_API UserWarning{};
|
||||||
class C10_API DeprecationWarning {};
|
class C10_API DeprecationWarning{};
|
||||||
|
|
||||||
using warning_variant_t = std::variant<UserWarning, DeprecationWarning>;
|
using warning_variant_t = std::variant<UserWarning, DeprecationWarning>;
|
||||||
|
|
||||||
|
@ -70,7 +70,9 @@ class LeftRight final {
|
|||||||
|
|
||||||
~LeftRight() {
|
~LeftRight() {
|
||||||
// wait until any potentially running writers are finished
|
// wait until any potentially running writers are finished
|
||||||
{ std::unique_lock<std::mutex> lock(_writeMutex); }
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(_writeMutex);
|
||||||
|
}
|
||||||
|
|
||||||
// wait until any potentially running readers are finished
|
// wait until any potentially running readers are finished
|
||||||
while (_counters[0].load() != 0 || _counters[1].load() != 0) {
|
while (_counters[0].load() != 0 || _counters[1].load() != 0) {
|
||||||
|
@ -370,9 +370,9 @@ class SmallVectorTemplateCommon
|
|||||||
/// note
|
/// note
|
||||||
template <
|
template <
|
||||||
typename T,
|
typename T,
|
||||||
bool = (std::is_trivially_copy_constructible_v<T>)&&(
|
bool = (std::is_trivially_copy_constructible_v<T>) &&
|
||||||
std::is_trivially_move_constructible_v<
|
(std::is_trivially_move_constructible_v<T>) &&
|
||||||
T>)&&std::is_trivially_destructible_v<T>>
|
std::is_trivially_destructible_v<T>>
|
||||||
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
|
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
|
||||||
friend class SmallVectorTemplateCommon<T>;
|
friend class SmallVectorTemplateCommon<T>;
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ void hookupHandler() {
|
|||||||
if (hookedUpCount++) {
|
if (hookedUpCount++) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
struct sigaction sa {};
|
struct sigaction sa{};
|
||||||
// Setup the handler
|
// Setup the handler
|
||||||
sa.sa_handler = &handleSignal;
|
sa.sa_handler = &handleSignal;
|
||||||
// Restart the system call, if at all possible
|
// Restart the system call, if at all possible
|
||||||
@ -80,7 +80,7 @@ void unhookHandler() {
|
|||||||
if (--hookedUpCount > 0) {
|
if (--hookedUpCount > 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
struct sigaction sa {};
|
struct sigaction sa{};
|
||||||
// Setup the sighub handler
|
// Setup the sighub handler
|
||||||
sa.sa_handler = SIG_DFL;
|
sa.sa_handler = SIG_DFL;
|
||||||
// Restart the system call, if at all possible
|
// Restart the system call, if at all possible
|
||||||
@ -273,7 +273,7 @@ void FatalSignalHandler::installFatalSignalHandlers() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fatalSignalHandlersInstalled = true;
|
fatalSignalHandlersInstalled = true;
|
||||||
struct sigaction sa {};
|
struct sigaction sa{};
|
||||||
sigemptyset(&sa.sa_mask);
|
sigemptyset(&sa.sa_mask);
|
||||||
// Since we'll be in an exiting situation it's possible there's memory
|
// Since we'll be in an exiting situation it's possible there's memory
|
||||||
// corruption, so make our own stack just in case.
|
// corruption, so make our own stack just in case.
|
||||||
|
@ -88,7 +88,7 @@ class C10_API FatalSignalHandler {
|
|||||||
bool fatalSignalHandlersInstalled;
|
bool fatalSignalHandlersInstalled;
|
||||||
// We need to hold a reference to call the previous SIGUSR2 handler in case
|
// We need to hold a reference to call the previous SIGUSR2 handler in case
|
||||||
// we didn't signal it
|
// we didn't signal it
|
||||||
struct sigaction previousSigusr2 {};
|
struct sigaction previousSigusr2{};
|
||||||
// Flag dictating whether the SIGUSR2 handler falls back to previous handlers
|
// Flag dictating whether the SIGUSR2 handler falls back to previous handlers
|
||||||
// or is intercepted in order to print a stack trace.
|
// or is intercepted in order to print a stack trace.
|
||||||
std::atomic<bool> fatalSignalReceived;
|
std::atomic<bool> fatalSignalReceived;
|
||||||
|
@ -87,7 +87,7 @@ struct DummyChunkDataReader : public datasets::ChunkDataReader<int> {
|
|||||||
return chunk_count_;
|
return chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
|
|
||||||
const static size_t chunk_count_ = 3;
|
const static size_t chunk_count_ = 3;
|
||||||
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-avoid-c-arrays)
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-avoid-c-arrays)
|
||||||
@ -1479,7 +1479,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
|
|||||||
void reset() override {
|
void reset() override {
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
void save(torch::serialize::OutputArchive& archive) const override{};
|
void save(torch::serialize::OutputArchive& archive) const override {};
|
||||||
void load(torch::serialize::InputArchive& archive) override {}
|
void load(torch::serialize::InputArchive& archive) override {}
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
};
|
};
|
||||||
@ -1517,7 +1517,7 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
|
|||||||
void reset() override {
|
void reset() override {
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
void save(torch::serialize::OutputArchive& archive) const override{};
|
void save(torch::serialize::OutputArchive& archive) const override {};
|
||||||
void load(torch::serialize::InputArchive& archive) override {}
|
void load(torch::serialize::InputArchive& archive) override {}
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
@ -1556,7 +1556,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
|
|||||||
void reset() override {
|
void reset() override {
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
void save(torch::serialize::OutputArchive& archive) const override{};
|
void save(torch::serialize::OutputArchive& archive) const override {};
|
||||||
void load(torch::serialize::InputArchive& archive) override {}
|
void load(torch::serialize::InputArchive& archive) override {}
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
};
|
};
|
||||||
@ -1605,7 +1605,7 @@ TEST(DataLoaderTest, StatefulDatasetWithCollate) {
|
|||||||
void reset() override {
|
void reset() override {
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
void save(torch::serialize::OutputArchive& archive) const override{};
|
void save(torch::serialize::OutputArchive& archive) const override {};
|
||||||
void load(torch::serialize::InputArchive& archive) override {}
|
void load(torch::serialize::InputArchive& archive) override {}
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
};
|
};
|
||||||
@ -1747,7 +1747,7 @@ TEST(DataLoaderTest, ChunkDataSetWithEmptyBatch) {
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t prefetch_count = 1;
|
const size_t prefetch_count = 1;
|
||||||
@ -1791,7 +1791,7 @@ TEST(DataLoaderTest, ChunkDataSetGetBatchWithUnevenBatchSize) {
|
|||||||
return 2;
|
return 2;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
};
|
};
|
||||||
|
|
||||||
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
||||||
@ -1936,7 +1936,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
|
|||||||
return chunk_count_;
|
return chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
BatchType batch_data_ = BatchType(chunk_size, 0);
|
BatchType batch_data_ = BatchType(chunk_size, 0);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2115,7 +2115,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
|
|||||||
|
|
||||||
class S : public samplers::Sampler<> {
|
class S : public samplers::Sampler<> {
|
||||||
public:
|
public:
|
||||||
explicit S(size_t size) : size_(size), index_(0){};
|
explicit S(size_t size) : size_(size), index_(0) {};
|
||||||
|
|
||||||
void reset(std::optional<size_t> new_size = std::nullopt) override {
|
void reset(std::optional<size_t> new_size = std::nullopt) override {
|
||||||
if (new_size.has_value()) {
|
if (new_size.has_value()) {
|
||||||
@ -2170,7 +2170,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
|
|||||||
return chunk_count_;
|
return chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
size_t chunk_count_;
|
size_t chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2258,7 +2258,7 @@ TEST(DataLoaderTest, CustomPreprocessPolicy) {
|
|||||||
return chunk_count_;
|
return chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void reset() override{};
|
void reset() override {};
|
||||||
size_t chunk_count_;
|
size_t chunk_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -127,9 +127,7 @@ TEST_F(ModulesTest, Conv2dSameStrided) {
|
|||||||
[&] { Conv2d model_invalid(options.stride(2)); }(),
|
[&] { Conv2d model_invalid(options.stride(2)); }(),
|
||||||
"padding='same' is not supported for strided convolutions");
|
"padding='same' is not supported for strided convolutions");
|
||||||
ASSERT_THROWS_WITH(
|
ASSERT_THROWS_WITH(
|
||||||
[&] {
|
[&] { Conv2d model_invalid(options.stride({1, 2})); }(),
|
||||||
Conv2d model_invalid(options.stride({1, 2}));
|
|
||||||
}(),
|
|
||||||
"padding='same' is not supported for strided convolutions");
|
"padding='same' is not supported for strided convolutions");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,9 +179,7 @@ TEST_F(ModulesTest, Conv3dSameStrided) {
|
|||||||
[&] { Conv3d model_invalid(options.stride(2)); }(),
|
[&] { Conv3d model_invalid(options.stride(2)); }(),
|
||||||
"padding='same' is not supported for strided convolutions");
|
"padding='same' is not supported for strided convolutions");
|
||||||
ASSERT_THROWS_WITH(
|
ASSERT_THROWS_WITH(
|
||||||
[&] {
|
[&] { Conv3d model_invalid(options.stride({1, 2, 1})); }(),
|
||||||
Conv3d model_invalid(options.stride({1, 2, 1}));
|
|
||||||
}(),
|
|
||||||
"padding='same' is not supported for strided convolutions");
|
"padding='same' is not supported for strided convolutions");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -920,7 +920,9 @@ TEST(TensorTest, Arange) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(TensorTest, PrettyPrintTensorDataContainer) {
|
TEST(TensorTest, PrettyPrintTensorDataContainer) {
|
||||||
{ ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1"); }
|
{
|
||||||
|
ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1");
|
||||||
|
}
|
||||||
{
|
{
|
||||||
ASSERT_EQ(
|
ASSERT_EQ(
|
||||||
c10::str(torch::detail::TensorDataContainer({1.1, 2.2})), "{1.1, 2.2}");
|
c10::str(torch::detail::TensorDataContainer({1.1, 2.2})), "{1.1, 2.2}");
|
||||||
|
@ -1033,9 +1033,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWrites) {
|
|||||||
auto ops = torch::RegisterOperators().op(
|
auto ops = torch::RegisterOperators().op(
|
||||||
"uses::list",
|
"uses::list",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](torch::List<at::Tensor> in) {
|
.catchAllKernel(
|
||||||
return torch::rand({2, 3});
|
[](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
||||||
// Write to the inside of a list. Check that we can't reorder a
|
// Write to the inside of a list. Check that we can't reorder a
|
||||||
// print across it.
|
// print across it.
|
||||||
@ -1073,9 +1072,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWritesNested) {
|
|||||||
auto ops = torch::RegisterOperators().op(
|
auto ops = torch::RegisterOperators().op(
|
||||||
"uses::list",
|
"uses::list",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](torch::List<at::Tensor> in) {
|
.catchAllKernel(
|
||||||
return torch::rand({2, 3});
|
[](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
||||||
// Write to the inside of a list. Check that we can't reorder a
|
// Write to the inside of a list. Check that we can't reorder a
|
||||||
// print across it.
|
// print across it.
|
||||||
@ -1257,9 +1255,8 @@ TEST(AliasRegistrationTest, ConservativeWithInferredSchema) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand1",
|
"foo::rand1",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand1");
|
const auto rand_op = Symbol::fromQualString("foo::rand1");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
@ -1274,9 +1271,8 @@ TEST(AliasRegistrationTest, ConservativeWithSpecifiedSchema) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand2(Tensor arg1) -> Tensor",
|
"foo::rand2(Tensor arg1) -> Tensor",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand2");
|
const auto rand_op = Symbol::fromQualString("foo::rand2");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
@ -1291,9 +1287,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand3(Tensor(a) arg1) -> Tensor(b)",
|
"foo::rand3(Tensor(a) arg1) -> Tensor(b)",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
||||||
|
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand3");
|
const auto rand_op = Symbol::fromQualString("foo::rand3");
|
||||||
@ -1312,9 +1307,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError2) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand4(Tensor(a) arg1) -> Tensor(a)",
|
"foo::rand4(Tensor(a) arg1) -> Tensor(a)",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand4");
|
const auto rand_op = Symbol::fromQualString("foo::rand4");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
@ -1334,9 +1328,8 @@ TEST(AliasRegistrationTest, FromSchemaWithInferredSchemaShouldError) {
|
|||||||
torch::RegisterOperators().op(
|
torch::RegisterOperators().op(
|
||||||
"foo::rand5",
|
"foo::rand5",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
|
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
|
||||||
},
|
},
|
||||||
"Tried to register operator foo::rand5(Tensor _0) -> Tensor _0 with AliasAnalysisKind::FROM_SCHEMA, but the schema is inferred");
|
"Tried to register operator foo::rand5(Tensor _0) -> Tensor _0 with AliasAnalysisKind::FROM_SCHEMA, but the schema is inferred");
|
||||||
@ -1346,9 +1339,8 @@ TEST(AliasRegistrationTest, FromSchemaInferredPure) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand6(Tensor arg1) -> Tensor",
|
"foo::rand6(Tensor arg1) -> Tensor",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
|
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand6");
|
const auto rand_op = Symbol::fromQualString("foo::rand6");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
@ -1395,9 +1387,8 @@ TEST(AliasRegistrationTest, PureNoSchema) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand9",
|
"foo::rand9",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand9");
|
const auto rand_op = Symbol::fromQualString("foo::rand9");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
@ -1412,9 +1403,8 @@ TEST(AliasRegistrationTest, PureWithSchema) {
|
|||||||
auto registry = torch::RegisterOperators().op(
|
auto registry = torch::RegisterOperators().op(
|
||||||
"foo::rand10(Tensor arg1) -> Tensor",
|
"foo::rand10(Tensor arg1) -> Tensor",
|
||||||
torch::RegisterOperators::options()
|
torch::RegisterOperators::options()
|
||||||
.catchAllKernel([](at::Tensor) -> at::Tensor {
|
.catchAllKernel(
|
||||||
return at::rand({2, 2});
|
[](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
|
||||||
})
|
|
||||||
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
|
||||||
const auto rand_op = Symbol::fromQualString("foo::rand10");
|
const auto rand_op = Symbol::fromQualString("foo::rand10");
|
||||||
auto graph = std::make_shared<Graph>();
|
auto graph = std::make_shared<Graph>();
|
||||||
|
@ -121,14 +121,10 @@ TEST(AutodiffTest, ADFormulas) {
|
|||||||
{"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }},
|
{"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }},
|
||||||
{"view",
|
{"view",
|
||||||
unary_pointwise_2d,
|
unary_pointwise_2d,
|
||||||
[](const VL& v) -> VL {
|
[](const VL& v) -> VL { return {v[0].view({3, 2})}; }},
|
||||||
return {v[0].view({3, 2})};
|
|
||||||
}},
|
|
||||||
{"expand",
|
{"expand",
|
||||||
{{2, 1}},
|
{{2, 1}},
|
||||||
[](const VL& v) -> VL {
|
[](const VL& v) -> VL { return {v[0].expand({2, 3})}; }},
|
||||||
return {v[0].expand({2, 3})};
|
|
||||||
}},
|
|
||||||
{"mm",
|
{"mm",
|
||||||
{{10, 12}, {12, 15}},
|
{{10, 12}, {12, 15}},
|
||||||
[](const VL& v) -> VL { return {v[0].mm(v[1])}; }},
|
[](const VL& v) -> VL { return {v[0].mm(v[1])}; }},
|
||||||
|
@ -863,8 +863,12 @@ void checkScopeCallbacks() {
|
|||||||
|
|
||||||
{
|
{
|
||||||
RECORD_TORCHSCRIPT_FUNCTION("test_method", {});
|
RECORD_TORCHSCRIPT_FUNCTION("test_method", {});
|
||||||
{ RECORD_FUNCTION("test_function", {}); }
|
{
|
||||||
{ RECORD_USER_SCOPE("test_user_scope"); }
|
RECORD_FUNCTION("test_function", {});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
RECORD_USER_SCOPE("test_user_scope");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TORCH_CHECK(!bad_scope);
|
TORCH_CHECK(!bad_scope);
|
||||||
@ -1057,7 +1061,9 @@ TEST(RecordFunctionTest, RecordFunctionGuard) {
|
|||||||
RECORD_USER_SCOPE("C");
|
RECORD_USER_SCOPE("C");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{ RECORD_USER_SCOPE("D"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("D");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TORCH_CHECK(fn_names.size() == 1);
|
TORCH_CHECK(fn_names.size() == 1);
|
||||||
@ -1084,7 +1090,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
add_remove_test_add_cb<2>();
|
add_remove_test_add_cb<2>();
|
||||||
auto h3 = add_remove_test_add_cb<3>();
|
auto h3 = add_remove_test_add_cb<3>();
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 3);
|
TORCH_CHECK(ids.size() == 3);
|
||||||
TORCH_CHECK(std::find(ids.begin(), ids.end(), 1) != ids.end());
|
TORCH_CHECK(std::find(ids.begin(), ids.end(), 1) != ids.end());
|
||||||
@ -1094,7 +1102,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
ids.clear();
|
ids.clear();
|
||||||
removeCallback(h1);
|
removeCallback(h1);
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 2);
|
TORCH_CHECK(ids.size() == 2);
|
||||||
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
||||||
@ -1103,7 +1113,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
ids.clear();
|
ids.clear();
|
||||||
removeCallback(h3);
|
removeCallback(h3);
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 1);
|
TORCH_CHECK(ids.size() == 1);
|
||||||
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
||||||
@ -1115,7 +1127,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
ids.clear();
|
ids.clear();
|
||||||
add_remove_test_add_cb<1>();
|
add_remove_test_add_cb<1>();
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 1);
|
TORCH_CHECK(ids.size() == 1);
|
||||||
TORCH_CHECK(ids[0] == 1);
|
TORCH_CHECK(ids[0] == 1);
|
||||||
@ -1128,7 +1142,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test_thread"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test_thread");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
th.join();
|
th.join();
|
||||||
TORCH_CHECK(ids.size() == 2);
|
TORCH_CHECK(ids.size() == 2);
|
||||||
@ -1136,7 +1152,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
|
||||||
ids.clear();
|
ids.clear();
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 1);
|
TORCH_CHECK(ids.size() == 1);
|
||||||
TORCH_CHECK(ids[0] == 1);
|
TORCH_CHECK(ids[0] == 1);
|
||||||
@ -1167,7 +1185,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
TORCH_CHECK(ctx->b == "test_str");
|
TORCH_CHECK(ctx->b == "test_str");
|
||||||
}));
|
}));
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
TORCH_CHECK(ids.size() == 1);
|
TORCH_CHECK(ids.size() == 1);
|
||||||
TORCH_CHECK(ids[0] == 1);
|
TORCH_CHECK(ids[0] == 1);
|
||||||
@ -1193,7 +1213,9 @@ TEST(RecordFunctionTest, Callbacks) {
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
// Will call both global and thread local callbacks.
|
// Will call both global and thread local callbacks.
|
||||||
{ RECORD_USER_SCOPE("test_thread"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test_thread");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
ctx_th.join();
|
ctx_th.join();
|
||||||
TORCH_CHECK(ids.size() == 2);
|
TORCH_CHECK(ids.size() == 2);
|
||||||
@ -1216,21 +1238,27 @@ TEST(RecordFunctionTest, ShouldRun) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
EXPECT_TRUE(ran) << "first run didn't happen";
|
EXPECT_TRUE(ran) << "first run didn't happen";
|
||||||
ran = false;
|
ran = false;
|
||||||
|
|
||||||
disableCallback(handle);
|
disableCallback(handle);
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
EXPECT_FALSE(ran) << "second run happened but shouldn't have";
|
EXPECT_FALSE(ran) << "second run happened but shouldn't have";
|
||||||
ran = false;
|
ran = false;
|
||||||
|
|
||||||
reenableCallback(handle);
|
reenableCallback(handle);
|
||||||
|
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
|
|
||||||
EXPECT_TRUE(ran) << "run after re-enable didn't happen";
|
EXPECT_TRUE(ran) << "run after re-enable didn't happen";
|
||||||
ran = false;
|
ran = false;
|
||||||
@ -1273,7 +1301,9 @@ TEST(RecordFunctionTest, Basic) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
})
|
})
|
||||||
.needsIds(true));
|
.needsIds(true));
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
TORCH_CHECK(has_ids);
|
TORCH_CHECK(has_ids);
|
||||||
clearCallbacks();
|
clearCallbacks();
|
||||||
has_ids = false;
|
has_ids = false;
|
||||||
@ -1282,7 +1312,9 @@ TEST(RecordFunctionTest, Basic) {
|
|||||||
has_ids = fn.handle() > 0;
|
has_ids = fn.handle() > 0;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}));
|
}));
|
||||||
{ RECORD_USER_SCOPE("test"); }
|
{
|
||||||
|
RECORD_USER_SCOPE("test");
|
||||||
|
}
|
||||||
TORCH_CHECK(!has_ids);
|
TORCH_CHECK(!has_ids);
|
||||||
clearCallbacks();
|
clearCallbacks();
|
||||||
}
|
}
|
||||||
|
@ -144,7 +144,9 @@ TEST(RecordFunctionTest, CallOrder) {
|
|||||||
#undef REGISTER_CALLBACK
|
#undef REGISTER_CALLBACK
|
||||||
|
|
||||||
RECORD_FUNCTION("Outer", {});
|
RECORD_FUNCTION("Outer", {});
|
||||||
{ RECORD_FUNCTION("Inner", {}); }
|
{
|
||||||
|
RECORD_FUNCTION("Inner", {});
|
||||||
|
}
|
||||||
|
|
||||||
at::clearCallbacks();
|
at::clearCallbacks();
|
||||||
ASSERT_FALSE(at::hasCallbacks());
|
ASSERT_FALSE(at::hasCallbacks());
|
||||||
|
@ -716,14 +716,13 @@ TEST(ExternalCall, UnaryFloat) {
|
|||||||
std::string,
|
std::string,
|
||||||
std::vector<ExprHandle>>;
|
std::vector<ExprHandle>>;
|
||||||
std::vector<Test> tests = {};
|
std::vector<Test> tests = {};
|
||||||
tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
|
tests.push_back(Test{
|
||||||
{1, 64, 8, 9},
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
|
||||||
{1, 64, 5, 7},
|
{1, 64, 8, 9},
|
||||||
[](at::Tensor x) {
|
{1, 64, 5, 7},
|
||||||
return at::adaptive_avg_pool2d(x, {5, 7});
|
[](at::Tensor x) { return at::adaptive_avg_pool2d(x, {5, 7}); },
|
||||||
},
|
"nnc_aten_adaptive_avg_pool2d",
|
||||||
"nnc_aten_adaptive_avg_pool2d",
|
toExprHandleVec({5, 7})});
|
||||||
toExprHandleVec({5, 7})});
|
|
||||||
tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
|
tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
|
||||||
{100, 200},
|
{100, 200},
|
||||||
{100},
|
{100},
|
||||||
|
@ -8,16 +8,16 @@
|
|||||||
],
|
],
|
||||||
"clang-format": {
|
"clang-format": {
|
||||||
"Darwin-arm": {
|
"Darwin-arm": {
|
||||||
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/17.0.6/clang-format",
|
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/19.1.4/clang-format",
|
||||||
"hash": "47c47f3c8275fd6e25d07128ef9a655d3f898eae6a59a7c7a801967871bdb2f7"
|
"hash": "f0da3ecf0ab1e9b50e8c27bd2d7ca0baa619e2f4b824b35d79d46356581fa552"
|
||||||
},
|
},
|
||||||
"Darwin-i386": {
|
"Darwin-i386": {
|
||||||
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/17.0.6/clang-format",
|
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/19.1.4/clang-format",
|
||||||
"hash": "23423cbe62feb535c05c88e6f47e420ca2777603f90dff4d33d19b6f5177a79e"
|
"hash": "f5eb5037b9aa9d1d2de650fb2e0fe1a2517768a462fae8e98791a67b698302f4"
|
||||||
},
|
},
|
||||||
"Linux": {
|
"Linux": {
|
||||||
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/17.0.6/clang-format",
|
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/19.1.4/clang-format",
|
||||||
"hash": "920159a0fafc7c65f6819e8a0b739ecc8e655f50f20a3a1db975a3473b86431b"
|
"hash": "bfa9ef6eccb372f79ffcb6196af966fd84519ea9567f5ae7b6ad30208cd82109"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"clang-tidy": {
|
"clang-tidy": {
|
||||||
|
@ -37,7 +37,7 @@ using namespace torch;
|
|||||||
auto _w = \
|
auto _w = \
|
||||||
write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char)); \
|
write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char)); \
|
||||||
(void)_w; \
|
(void)_w; \
|
||||||
struct sigaction sa {}; \
|
struct sigaction sa{}; \
|
||||||
sa.sa_handler = SIG_DFL; \
|
sa.sa_handler = SIG_DFL; \
|
||||||
sa.sa_flags = 0; \
|
sa.sa_flags = 0; \
|
||||||
if (sigemptyset(&sa.sa_mask) != 0 || \
|
if (sigemptyset(&sa.sa_mask) != 0 || \
|
||||||
@ -54,7 +54,7 @@ static void setSignalHandler(
|
|||||||
int signal,
|
int signal,
|
||||||
void (*handler)(int, siginfo_t*, void*),
|
void (*handler)(int, siginfo_t*, void*),
|
||||||
struct sigaction* old_sa_ptr) {
|
struct sigaction* old_sa_ptr) {
|
||||||
struct sigaction sa {};
|
struct sigaction sa{};
|
||||||
sa.sa_sigaction = handler;
|
sa.sa_sigaction = handler;
|
||||||
sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP | SA_NODEFER;
|
sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP | SA_NODEFER;
|
||||||
if (sigemptyset(&sa.sa_mask) != 0 ||
|
if (sigemptyset(&sa.sa_mask) != 0 ||
|
||||||
@ -92,7 +92,7 @@ static void handler_SIGTERM(int sig, siginfo_t* info, void* ctx) {
|
|||||||
if (info->si_pid == getppid()) {
|
if (info->si_pid == getppid()) {
|
||||||
_exit(EXIT_SUCCESS);
|
_exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
struct sigaction sa {};
|
struct sigaction sa{};
|
||||||
sa.sa_handler = SIG_DFL;
|
sa.sa_handler = SIG_DFL;
|
||||||
sa.sa_flags = 0;
|
sa.sa_flags = 0;
|
||||||
if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, nullptr) != 0) {
|
if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, nullptr) != 0) {
|
||||||
|
@ -40,10 +40,9 @@ struct has_forward {
|
|||||||
|
|
||||||
template <typename Head = void, typename... Tail>
|
template <typename Head = void, typename... Tail>
|
||||||
constexpr bool check_not_lvalue_references() {
|
constexpr bool check_not_lvalue_references() {
|
||||||
return (
|
return (!std::is_lvalue_reference_v<Head> ||
|
||||||
!std::is_lvalue_reference_v<Head> ||
|
std::is_const_v<std::remove_reference_t<Head>>) &&
|
||||||
std::is_const_v<std::remove_reference_t<
|
check_not_lvalue_references<Tail...>();
|
||||||
Head>>)&&check_not_lvalue_references<Tail...>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -59,7 +59,7 @@ namespace {
|
|||||||
// in data parallel, and should not be exposed as a user API.
|
// in data parallel, and should not be exposed as a user API.
|
||||||
struct ReduceAdd : public autograd::Node {
|
struct ReduceAdd : public autograd::Node {
|
||||||
explicit ReduceAdd(const at::Device& destination_device)
|
explicit ReduceAdd(const at::Device& destination_device)
|
||||||
: destination_device_(destination_device){};
|
: destination_device_(destination_device) {};
|
||||||
~ReduceAdd() override = default;
|
~ReduceAdd() override = default;
|
||||||
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
|
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
|
||||||
|
@ -71,7 +71,7 @@ template <
|
|||||||
typename T,
|
typename T,
|
||||||
typename ValueT,
|
typename ValueT,
|
||||||
typename ParamsT,
|
typename ParamsT,
|
||||||
ValueT ParamsT::*ptr,
|
ValueT ParamsT::* ptr,
|
||||||
typename ConvertArgT,
|
typename ConvertArgT,
|
||||||
PyObject* (*Convert)(ConvertArgT)>
|
PyObject* (*Convert)(ConvertArgT)>
|
||||||
static PyObject* getTupleAttr(PyObject* obj, void* _unused) {
|
static PyObject* getTupleAttr(PyObject* obj, void* _unused) {
|
||||||
@ -93,7 +93,7 @@ template <
|
|||||||
typename T,
|
typename T,
|
||||||
typename ValueT,
|
typename ValueT,
|
||||||
typename ParamsT,
|
typename ParamsT,
|
||||||
ValueT ParamsT::*ptr,
|
ValueT ParamsT::* ptr,
|
||||||
typename ConvertArgT,
|
typename ConvertArgT,
|
||||||
PyObject* (*Convert)(ConvertArgT)>
|
PyObject* (*Convert)(ConvertArgT)>
|
||||||
static PyObject* getValueAttr(PyObject* obj, void* _unused) {
|
static PyObject* getValueAttr(PyObject* obj, void* _unused) {
|
||||||
|
@ -41,10 +41,10 @@ struct TORCH_API JitDecompInterface {
|
|||||||
TORCH_API void setJitDecompImpl(JitDecompInterface* impl);
|
TORCH_API void setJitDecompImpl(JitDecompInterface* impl);
|
||||||
TORCH_API JitDecompInterface* getJitDecompImpl();
|
TORCH_API JitDecompInterface* getJitDecompImpl();
|
||||||
|
|
||||||
struct TORCH_API JitDecompRegisterer {
|
struct TORCH_API JitDecompRegisterer{explicit JitDecompRegisterer(
|
||||||
explicit JitDecompRegisterer(JitDecompInterface* impl) {
|
JitDecompInterface * impl){setJitDecompImpl(impl);
|
||||||
setJitDecompImpl(impl);
|
} // namespace torch::autograd::impl
|
||||||
}
|
}
|
||||||
};
|
;
|
||||||
|
|
||||||
} // namespace torch::autograd::impl
|
} // namespace torch::autograd::impl
|
||||||
|
@ -1625,7 +1625,7 @@ using setter = int (*)(PyObject*, PyObject*, void*);
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
template <PyObject* THPFunction::*ptr>
|
template <PyObject* THPFunction::* ptr>
|
||||||
PyObject* getObject(PyObject* obj, void* _unused) {
|
PyObject* getObject(PyObject* obj, void* _unused) {
|
||||||
auto self = (THPFunction*)obj;
|
auto self = (THPFunction*)obj;
|
||||||
PyObject* value = self->*ptr;
|
PyObject* value = self->*ptr;
|
||||||
@ -1636,7 +1636,7 @@ PyObject* getObject(PyObject* obj, void* _unused) {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <PyObject* THPFunction::*ptr>
|
template <PyObject* THPFunction::* ptr>
|
||||||
int setObject(PyObject* obj, PyObject* value, void* _unused) {
|
int setObject(PyObject* obj, PyObject* value, void* _unused) {
|
||||||
auto self = (THPFunction*)obj;
|
auto self = (THPFunction*)obj;
|
||||||
if (value == Py_None) {
|
if (value == Py_None) {
|
||||||
@ -1648,13 +1648,13 @@ int setObject(PyObject* obj, PyObject* value, void* _unused) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename M, M THPFunction::*ptr, PyObject* (*Convert)(long)>
|
template <typename M, M THPFunction::* ptr, PyObject* (*Convert)(long)>
|
||||||
PyObject* getMember(PyObject* obj, void* _unused) {
|
PyObject* getMember(PyObject* obj, void* _unused) {
|
||||||
auto self = (THPFunction*)obj;
|
auto self = (THPFunction*)obj;
|
||||||
return Convert(self->*ptr);
|
return Convert(self->*ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename M, M autograd::Node::*ptr, PyObject* (*Convert)(long)>
|
template <typename M, M autograd::Node::* ptr, PyObject* (*Convert)(long)>
|
||||||
PyObject* getImplMember(PyObject* obj, void* _unused) {
|
PyObject* getImplMember(PyObject* obj, void* _unused) {
|
||||||
auto self = (THPFunction*)obj;
|
auto self = (THPFunction*)obj;
|
||||||
return Convert(self->cdata.*ptr);
|
return Convert(self->cdata.*ptr);
|
||||||
|
@ -37,7 +37,7 @@ class CUDASymmetricMemory : public SymmetricMemory {
|
|||||||
int rank,
|
int rank,
|
||||||
int world_size);
|
int world_size);
|
||||||
|
|
||||||
~CUDASymmetricMemory() override{};
|
~CUDASymmetricMemory() override {};
|
||||||
|
|
||||||
std::vector<void*> get_buffer_ptrs() override;
|
std::vector<void*> get_buffer_ptrs() override;
|
||||||
std::vector<void*> get_signal_pad_ptrs() override;
|
std::vector<void*> get_signal_pad_ptrs() override;
|
||||||
|
@ -85,14 +85,16 @@ void IpcChannel::send_fd(int dst_pid, int fd) {
|
|||||||
memset(cbuf, 0, sizeof(cbuf));
|
memset(cbuf, 0, sizeof(cbuf));
|
||||||
|
|
||||||
// Create message header
|
// Create message header
|
||||||
struct msghdr msg {
|
struct msghdr msg{
|
||||||
// destination socket address and size of it
|
// destination socket address and size of it
|
||||||
// message content in msg_iov and number of such structs (1 in our case)
|
// message content in msg_iov and number of such structs (1 in our case)
|
||||||
// auxiliary data with the value of fd and size of it
|
// auxiliary data with the value of fd and size of it
|
||||||
.msg_name = (void*)&addr, .msg_namelen = sizeof(struct sockaddr_un),
|
.msg_name = (void*)&addr,
|
||||||
.msg_iov = &io, .msg_iovlen = 1, .msg_control = cbuf,
|
.msg_namelen = sizeof(struct sockaddr_un),
|
||||||
.msg_controllen = sizeof(cbuf)
|
.msg_iov = &io,
|
||||||
};
|
.msg_iovlen = 1,
|
||||||
|
.msg_control = cbuf,
|
||||||
|
.msg_controllen = sizeof(cbuf)};
|
||||||
|
|
||||||
// This points to the first control message header
|
// This points to the first control message header
|
||||||
// With SCM_RIGHTS we let the kernel know that we are passing file
|
// With SCM_RIGHTS we let the kernel know that we are passing file
|
||||||
|
@ -136,25 +136,26 @@ IMPL_REDUCE(CPU)
|
|||||||
IMPL_REDUCE(CUDA)
|
IMPL_REDUCE(CUDA)
|
||||||
IMPL_REDUCE(PrivateUse1)
|
IMPL_REDUCE(PrivateUse1)
|
||||||
|
|
||||||
#define IMPL_BROADCAST(DEV) \
|
#define IMPL_BROADCAST(DEV) \
|
||||||
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
||||||
broadcast_##DEV( \
|
broadcast_##DEV( \
|
||||||
at::TensorList tensors, \
|
at::TensorList tensors, \
|
||||||
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
||||||
int64_t root_rank, \
|
int64_t root_rank, \
|
||||||
int64_t root_tensor, \
|
int64_t root_tensor, \
|
||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto tensor_vec = tensors.vec(); \
|
auto tensor_vec = tensors.vec(); \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) -> broadcast( \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
tensor_vec, \
|
->broadcast( \
|
||||||
BroadcastOptions{ \
|
tensor_vec, \
|
||||||
root_rank, \
|
BroadcastOptions{ \
|
||||||
root_tensor, \
|
root_rank, \
|
||||||
std::chrono::milliseconds(timeout), \
|
root_tensor, \
|
||||||
asyncOp}); \
|
std::chrono::milliseconds(timeout), \
|
||||||
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
asyncOp}); \
|
||||||
std::move(tensor_vec), work); \
|
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
||||||
|
std::move(tensor_vec), work); \
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPL_BROADCAST(CPU)
|
IMPL_BROADCAST(CPU)
|
||||||
@ -164,22 +165,25 @@ IMPL_BROADCAST(PrivateUse1)
|
|||||||
// Return input tensors as output tensors to make inplace allreduce look like
|
// Return input tensors as output tensors to make inplace allreduce look like
|
||||||
// a functional API, so that make_fx can correctly build the dependencies in
|
// a functional API, so that make_fx can correctly build the dependencies in
|
||||||
// the graph later.
|
// the graph later.
|
||||||
#define IMPL_ALLREDUCE(DEV) \
|
#define IMPL_ALLREDUCE(DEV) \
|
||||||
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
||||||
allreduce_##DEV( \
|
allreduce_##DEV( \
|
||||||
at::TensorList tensors, \
|
at::TensorList tensors, \
|
||||||
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
||||||
const c10::intrusive_ptr<ReduceOp>& reduce_op, \
|
const c10::intrusive_ptr<ReduceOp>& reduce_op, \
|
||||||
const std::optional<at::Tensor>& sparse_indices, \
|
const std::optional<at::Tensor>& sparse_indices, \
|
||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto tensor_vec = tensors.vec(); \
|
auto tensor_vec = tensors.vec(); \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) -> allreduce( \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
tensor_vec, \
|
->allreduce( \
|
||||||
AllreduceOptions{ \
|
tensor_vec, \
|
||||||
*reduce_op.get(), std::chrono::milliseconds(timeout), asyncOp}); \
|
AllreduceOptions{ \
|
||||||
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
*reduce_op.get(), \
|
||||||
std::move(tensor_vec), work); \
|
std::chrono::milliseconds(timeout), \
|
||||||
|
asyncOp}); \
|
||||||
|
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
||||||
|
std::move(tensor_vec), work); \
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPL_ALLREDUCE(CPU)
|
IMPL_ALLREDUCE(CPU)
|
||||||
@ -217,10 +221,13 @@ IMPL_ALLREDUCE_COALESCED(PrivateUse1)
|
|||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto input_tensors_vec = input_tensors.vec(); \
|
auto input_tensors_vec = input_tensors.vec(); \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) -> allgather( \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
const_cast<std::vector<std::vector<at::Tensor>>&>(output_tensors), \
|
->allgather( \
|
||||||
input_tensors_vec, \
|
const_cast<std::vector<std::vector<at::Tensor>>&>( \
|
||||||
AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp}); \
|
output_tensors), \
|
||||||
|
input_tensors_vec, \
|
||||||
|
AllgatherOptions{ \
|
||||||
|
std::chrono::milliseconds(timeout), asyncOp}); \
|
||||||
return std:: \
|
return std:: \
|
||||||
tuple<std::vector<std::vector<at::Tensor>>, c10::intrusive_ptr<Work>>( \
|
tuple<std::vector<std::vector<at::Tensor>>, c10::intrusive_ptr<Work>>( \
|
||||||
output_tensors, work); \
|
output_tensors, work); \
|
||||||
@ -231,20 +238,21 @@ IMPL_ALLGATHER(CPU)
|
|||||||
IMPL_ALLGATHER(CUDA)
|
IMPL_ALLGATHER(CUDA)
|
||||||
IMPL_ALLGATHER(PrivateUse1)
|
IMPL_ALLGATHER(PrivateUse1)
|
||||||
|
|
||||||
#define IMPL__ALLGATHER_BASE(DEV) \
|
#define IMPL__ALLGATHER_BASE(DEV) \
|
||||||
std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV( \
|
std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV( \
|
||||||
at::Tensor& output_tensor, \
|
at::Tensor& output_tensor, \
|
||||||
at::Tensor& input_tensor, \
|
at::Tensor& input_tensor, \
|
||||||
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto work = \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
process_group->getBackend(c10::DeviceType::DEV) -> _allgather_base( \
|
->_allgather_base( \
|
||||||
output_tensor, \
|
output_tensor, \
|
||||||
input_tensor, \
|
input_tensor, \
|
||||||
AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp}); \
|
AllgatherOptions{ \
|
||||||
return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>( \
|
std::chrono::milliseconds(timeout), asyncOp}); \
|
||||||
output_tensor, work); \
|
return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>( \
|
||||||
|
output_tensor, work); \
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPL__ALLGATHER_BASE(CPU)
|
IMPL__ALLGATHER_BASE(CPU)
|
||||||
@ -289,26 +297,27 @@ IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CPU)
|
|||||||
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CUDA)
|
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CUDA)
|
||||||
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(PrivateUse1)
|
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(PrivateUse1)
|
||||||
|
|
||||||
#define IMPL_REDUCE_SCATTER(DEV) \
|
#define IMPL_REDUCE_SCATTER(DEV) \
|
||||||
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
||||||
reduce_scatter_##DEV( \
|
reduce_scatter_##DEV( \
|
||||||
const at::TensorList& output_tensors, \
|
const at::TensorList& output_tensors, \
|
||||||
const std::vector<std::vector<at::Tensor>>& input_tensors, \
|
const std::vector<std::vector<at::Tensor>>& input_tensors, \
|
||||||
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
||||||
const c10::intrusive_ptr<ReduceOp>& reduce_op, \
|
const c10::intrusive_ptr<ReduceOp>& reduce_op, \
|
||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto output_tensors_vec = output_tensors.vec(); \
|
auto output_tensors_vec = output_tensors.vec(); \
|
||||||
auto work = \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
process_group->getBackend(c10::DeviceType::DEV) -> reduce_scatter( \
|
->reduce_scatter( \
|
||||||
output_tensors_vec, \
|
output_tensors_vec, \
|
||||||
const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors), \
|
const_cast<std::vector<std::vector<at::Tensor>>&>( \
|
||||||
ReduceScatterOptions{ \
|
input_tensors), \
|
||||||
*reduce_op.get(), \
|
ReduceScatterOptions{ \
|
||||||
std::chrono::milliseconds(timeout), \
|
*reduce_op.get(), \
|
||||||
asyncOp}); \
|
std::chrono::milliseconds(timeout), \
|
||||||
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
asyncOp}); \
|
||||||
output_tensors_vec, work); \
|
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
||||||
|
output_tensors_vec, work); \
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPL_REDUCE_SCATTER(CPU)
|
IMPL_REDUCE_SCATTER(CPU)
|
||||||
@ -324,7 +333,7 @@ IMPL_REDUCE_SCATTER(PrivateUse1)
|
|||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
auto work = process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
-> _reduce_scatter_base( \
|
->_reduce_scatter_base( \
|
||||||
output_tensor, \
|
output_tensor, \
|
||||||
input_tensor, \
|
input_tensor, \
|
||||||
ReduceScatterOptions{ \
|
ReduceScatterOptions{ \
|
||||||
@ -393,11 +402,14 @@ IMPL_GATHER(PrivateUse1)
|
|||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto output_tensors_vec = output_tensors.vec(); \
|
auto output_tensors_vec = output_tensors.vec(); \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) -> scatter( \
|
auto work = \
|
||||||
output_tensors_vec, \
|
process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors), \
|
->scatter( \
|
||||||
ScatterOptions{ \
|
output_tensors_vec, \
|
||||||
root_rank, std::chrono::milliseconds(timeout), asyncOp}); \
|
const_cast<std::vector<std::vector<at::Tensor>>&>( \
|
||||||
|
input_tensors), \
|
||||||
|
ScatterOptions{ \
|
||||||
|
root_rank, std::chrono::milliseconds(timeout), asyncOp}); \
|
||||||
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
||||||
std::move(output_tensors_vec), work); \
|
std::move(output_tensors_vec), work); \
|
||||||
}
|
}
|
||||||
@ -406,22 +418,24 @@ IMPL_SCATTER(CPU)
|
|||||||
IMPL_SCATTER(CUDA)
|
IMPL_SCATTER(CUDA)
|
||||||
IMPL_SCATTER(PrivateUse1)
|
IMPL_SCATTER(PrivateUse1)
|
||||||
|
|
||||||
#define IMPL_ALLTOALL(DEV) \
|
#define IMPL_ALLTOALL(DEV) \
|
||||||
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
|
||||||
alltoall_##DEV( \
|
alltoall_##DEV( \
|
||||||
const at::TensorList& output_tensors, \
|
const at::TensorList& output_tensors, \
|
||||||
const at::TensorList& input_tensors, \
|
const at::TensorList& input_tensors, \
|
||||||
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
const c10::intrusive_ptr<ProcessGroup>& process_group, \
|
||||||
bool asyncOp, \
|
bool asyncOp, \
|
||||||
int64_t timeout) { \
|
int64_t timeout) { \
|
||||||
auto output_tensors_vec = output_tensors.vec(); \
|
auto output_tensors_vec = output_tensors.vec(); \
|
||||||
auto input_tensors_vec = input_tensors.vec(); \
|
auto input_tensors_vec = input_tensors.vec(); \
|
||||||
auto work = process_group->getBackend(c10::DeviceType::DEV) -> alltoall( \
|
auto work = \
|
||||||
output_tensors_vec, \
|
process_group->getBackend(c10::DeviceType::DEV) \
|
||||||
input_tensors_vec, \
|
->alltoall( \
|
||||||
AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp}); \
|
output_tensors_vec, \
|
||||||
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
input_tensors_vec, \
|
||||||
std::move(output_tensors_vec), work); \
|
AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp}); \
|
||||||
|
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
|
||||||
|
std::move(output_tensors_vec), work); \
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPL_ALLTOALL(CPU)
|
IMPL_ALLTOALL(CPU)
|
||||||
|
@ -435,7 +435,7 @@ void socketInitialize() {
|
|||||||
// gracefully fall back to an alternative if it doesn't.
|
// gracefully fall back to an alternative if it doesn't.
|
||||||
bool doesHostnameResolveToUsableAddress(const std::string& hostname) {
|
bool doesHostnameResolveToUsableAddress(const std::string& hostname) {
|
||||||
socketInitialize();
|
socketInitialize();
|
||||||
struct addrinfo hints {};
|
struct addrinfo hints{};
|
||||||
hints.ai_family = AF_UNSPEC;
|
hints.ai_family = AF_UNSPEC;
|
||||||
hints.ai_socktype = SOCK_STREAM;
|
hints.ai_socktype = SOCK_STREAM;
|
||||||
struct addrinfo* result = nullptr;
|
struct addrinfo* result = nullptr;
|
||||||
|
@ -155,7 +155,7 @@ class UvTcpSocket : public UvHandle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void startRead() {
|
void startRead() {
|
||||||
struct ::sockaddr_storage addr {};
|
struct ::sockaddr_storage addr{};
|
||||||
int addrLen{sizeof(struct ::sockaddr_storage)};
|
int addrLen{sizeof(struct ::sockaddr_storage)};
|
||||||
|
|
||||||
if (int err = uv_tcp_getpeername(
|
if (int err = uv_tcp_getpeername(
|
||||||
@ -263,7 +263,7 @@ class UvTcpServer : public UvTcpSocket {
|
|||||||
auto res = c10::make_intrusive<UvTcpServer>(loop);
|
auto res = c10::make_intrusive<UvTcpServer>(loop);
|
||||||
res->handleReady();
|
res->handleReady();
|
||||||
try {
|
try {
|
||||||
struct sockaddr_storage addr {};
|
struct sockaddr_storage addr{};
|
||||||
int uv_res = 0;
|
int uv_res = 0;
|
||||||
if (useIpv6) {
|
if (useIpv6) {
|
||||||
uv_res = uv_ip6_addr("::", port, (struct sockaddr_in6*)&addr);
|
uv_res = uv_ip6_addr("::", port, (struct sockaddr_in6*)&addr);
|
||||||
|
@ -40,5 +40,5 @@ RegisterHandler tracebackHandler{
|
|||||||
|
|
||||||
res.setContent(std::move(file_contents), "text/plain");
|
res.setContent(std::move(file_contents), "text/plain");
|
||||||
}};
|
}};
|
||||||
}
|
} // namespace
|
||||||
} // namespace c10d::control_plane
|
} // namespace c10d::control_plane
|
||||||
|
@ -631,8 +631,8 @@ PyObject* rpc_init(PyObject* _unused, PyObject* noargs) {
|
|||||||
py::call_guard<py::gil_scoped_release>())
|
py::call_guard<py::gil_scoped_release>())
|
||||||
.def(
|
.def(
|
||||||
"_get_device_map",
|
"_get_device_map",
|
||||||
(DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst) const) &
|
(DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst)
|
||||||
TensorPipeAgent::getDeviceMap,
|
const)&TensorPipeAgent::getDeviceMap,
|
||||||
py::call_guard<py::gil_scoped_release>())
|
py::call_guard<py::gil_scoped_release>())
|
||||||
.def(
|
.def(
|
||||||
"_get_backend_options",
|
"_get_backend_options",
|
||||||
|
@ -14,7 +14,7 @@ constexpr auto kInternalModule = "torch.distributed.rpc.internal";
|
|||||||
#define PROFILE_GIL_SCOPED_ACQUIRE \
|
#define PROFILE_GIL_SCOPED_ACQUIRE \
|
||||||
std::chrono::time_point<std::chrono::high_resolution_clock> startTime; \
|
std::chrono::time_point<std::chrono::high_resolution_clock> startTime; \
|
||||||
auto shouldProfileGIL = \
|
auto shouldProfileGIL = \
|
||||||
RpcAgent::getCurrentRpcAgent() -> isGILProfilingEnabled(); \
|
RpcAgent::getCurrentRpcAgent()->isGILProfilingEnabled(); \
|
||||||
if (shouldProfileGIL) { \
|
if (shouldProfileGIL) { \
|
||||||
startTime = std::chrono::high_resolution_clock::now(); \
|
startTime = std::chrono::high_resolution_clock::now(); \
|
||||||
} \
|
} \
|
||||||
|
@ -39,7 +39,7 @@ bool file_exists(const std::string& path) {
|
|||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return fs::exists(path);
|
return fs::exists(path);
|
||||||
#else
|
#else
|
||||||
struct stat rc {};
|
struct stat rc{};
|
||||||
return lstat(path.c_str(), &rc) == 0;
|
return lstat(path.c_str(), &rc) == 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -217,7 +217,7 @@ bool recursive_rmdir(const std::string& path) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct dirent* entry = nullptr;
|
struct dirent* entry = nullptr;
|
||||||
struct stat statbuf {};
|
struct stat statbuf{};
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
// Iterate through directory entries
|
// Iterate through directory entries
|
||||||
|
@ -17,7 +17,7 @@ bool file_exists(std::string& path) {
|
|||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return fs::exists(path);
|
return fs::exists(path);
|
||||||
#else
|
#else
|
||||||
struct stat rc {};
|
struct stat rc{};
|
||||||
return lstat(path.c_str(), &rc) == 0;
|
return lstat(path.c_str(), &rc) == 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -123,13 +123,13 @@ getAOTIModelRunnerRegistry();
|
|||||||
// To register a new external backend in AOTI one needs to create an instance of
|
// To register a new external backend in AOTI one needs to create an instance of
|
||||||
// this struct. It is not thread-safe. Becase it is expected to be called during
|
// this struct. It is not thread-safe. Becase it is expected to be called during
|
||||||
// the initialization of the program.
|
// the initialization of the program.
|
||||||
struct TORCH_API RegisterAOTIModelRunner {
|
struct TORCH_API RegisterAOTIModelRunner{RegisterAOTIModelRunner(
|
||||||
RegisterAOTIModelRunner(
|
const std::string& name,
|
||||||
const std::string& name,
|
CreateAOTIModelRunnerFunc create_aoti_model_runner_fn){
|
||||||
CreateAOTIModelRunnerFunc create_aoti_model_runner_fn) {
|
|
||||||
getAOTIModelRunnerRegistry()[name] = create_aoti_model_runner_fn;
|
getAOTIModelRunnerRegistry()[name] = create_aoti_model_runner_fn;
|
||||||
}
|
} // namespace torch::inductor
|
||||||
};
|
}
|
||||||
|
;
|
||||||
|
|
||||||
} // namespace torch::inductor
|
} // namespace torch::inductor
|
||||||
#endif
|
#endif
|
||||||
|
@ -87,7 +87,7 @@ bool file_exists(std::string& path) {
|
|||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return fs::exists(path);
|
return fs::exists(path);
|
||||||
#else
|
#else
|
||||||
struct stat rc {};
|
struct stat rc{};
|
||||||
return lstat(path.c_str(), &rc) == 0;
|
return lstat(path.c_str(), &rc) == 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -358,7 +358,7 @@ struct IndexValueVec {
|
|||||||
index = at::vec::VectorizedN<int64_t, NI>(0);
|
index = at::vec::VectorizedN<int64_t, NI>(0);
|
||||||
};
|
};
|
||||||
|
|
||||||
IndexValueVec(){};
|
IndexValueVec() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <
|
template <
|
||||||
|
@ -451,7 +451,7 @@ std::array<PyMethodDef, 2> StaticCudaLauncherMethods = {
|
|||||||
// We don't implement __new__ or __init__ because we're using it only as a
|
// We don't implement __new__ or __init__ because we're using it only as a
|
||||||
// container for static methods.
|
// container for static methods.
|
||||||
PyTypeObject StaticCudaLauncherType = {
|
PyTypeObject StaticCudaLauncherType = {
|
||||||
PyVarObject_HEAD_INIT(nullptr, 0)
|
PyVarObject_HEAD_INIT(nullptr, 0)
|
||||||
"torch._C._StaticCudaLauncher", // tp_name
|
"torch._C._StaticCudaLauncher", // tp_name
|
||||||
sizeof(PyObject), // tp_basicsize
|
sizeof(PyObject), // tp_basicsize
|
||||||
0, // tp_itemsize
|
0, // tp_itemsize
|
||||||
|
@ -45,12 +45,12 @@ TORCH_API void registerFusionBackend(
|
|||||||
at::Device::Type backend_type,
|
at::Device::Type backend_type,
|
||||||
FusedKernelConstructor ctor);
|
FusedKernelConstructor ctor);
|
||||||
TORCH_API bool hasFusionBackend(at::Device::Type backend_type);
|
TORCH_API bool hasFusionBackend(at::Device::Type backend_type);
|
||||||
struct TORCH_API RegisterFusionBackend {
|
struct TORCH_API RegisterFusionBackend{RegisterFusionBackend(
|
||||||
RegisterFusionBackend(
|
at::Device::Type backend_type,
|
||||||
at::Device::Type backend_type,
|
FusedKernelConstructor ctor){
|
||||||
FusedKernelConstructor ctor) {
|
|
||||||
registerFusionBackend(backend_type, std::move(ctor));
|
registerFusionBackend(backend_type, std::move(ctor));
|
||||||
}
|
} // namespace torch::jit::fuser
|
||||||
};
|
}
|
||||||
|
;
|
||||||
|
|
||||||
} // namespace torch::jit::fuser
|
} // namespace torch::jit::fuser
|
||||||
|
@ -134,7 +134,7 @@ static inline std::tuple<std::shared_ptr<char>, size_t> get_file_content(
|
|||||||
// failed to open file, chances are it's no such file or directory.
|
// failed to open file, chances are it's no such file or directory.
|
||||||
file_not_found_error();
|
file_not_found_error();
|
||||||
}
|
}
|
||||||
struct stat statbuf {};
|
struct stat statbuf{};
|
||||||
fstat(fd, &statbuf);
|
fstat(fd, &statbuf);
|
||||||
size_t size = statbuf.st_size;
|
size_t size = statbuf.st_size;
|
||||||
void* ptr = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
void* ptr = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||||
|
@ -131,153 +131,153 @@ namespace torch::jit::tensorexpr {
|
|||||||
|
|
||||||
c10::ArrayRef<SymbolAddress> getIntrinsicSymbols() {
|
c10::ArrayRef<SymbolAddress> getIntrinsicSymbols() {
|
||||||
static SymbolAddress symbolAddresses[] = {
|
static SymbolAddress symbolAddresses[] = {
|
||||||
{"log10f", reinterpret_cast<void*>(&log10f)},
|
{"log10f", reinterpret_cast<void*>(&log10f)},
|
||||||
{"log1pf", reinterpret_cast<void*>(&log1pf)},
|
{"log1pf", reinterpret_cast<void*>(&log1pf)},
|
||||||
{"logf", reinterpret_cast<void*>(&logf)},
|
{"logf", reinterpret_cast<void*>(&logf)},
|
||||||
{"log2f", reinterpret_cast<void*>(&log2f)},
|
{"log2f", reinterpret_cast<void*>(&log2f)},
|
||||||
{"expf", reinterpret_cast<void*>(&expf)},
|
{"expf", reinterpret_cast<void*>(&expf)},
|
||||||
{"erff", reinterpret_cast<void*>(&erff)},
|
{"erff", reinterpret_cast<void*>(&erff)},
|
||||||
{"cosf", reinterpret_cast<void*>(&cosf)},
|
{"cosf", reinterpret_cast<void*>(&cosf)},
|
||||||
{"sinf", reinterpret_cast<void*>(&sinf)},
|
{"sinf", reinterpret_cast<void*>(&sinf)},
|
||||||
{"tanf", reinterpret_cast<void*>(&tanf)},
|
{"tanf", reinterpret_cast<void*>(&tanf)},
|
||||||
{"acosf", reinterpret_cast<void*>(&acosf)},
|
{"acosf", reinterpret_cast<void*>(&acosf)},
|
||||||
{"asinf", reinterpret_cast<void*>(&asinf)},
|
{"asinf", reinterpret_cast<void*>(&asinf)},
|
||||||
{"atanf", reinterpret_cast<void*>(&atanf)},
|
{"atanf", reinterpret_cast<void*>(&atanf)},
|
||||||
{"coshf", reinterpret_cast<void*>(&coshf)},
|
{"coshf", reinterpret_cast<void*>(&coshf)},
|
||||||
{"sinhf", reinterpret_cast<void*>(&sinhf)},
|
{"sinhf", reinterpret_cast<void*>(&sinhf)},
|
||||||
{"tanhf", reinterpret_cast<void*>(&tanhf)},
|
{"tanhf", reinterpret_cast<void*>(&tanhf)},
|
||||||
{"sqrtf", reinterpret_cast<void*>(&sqrtf)},
|
{"sqrtf", reinterpret_cast<void*>(&sqrtf)},
|
||||||
{"fabsf", reinterpret_cast<void*>(&fabsf)},
|
{"fabsf", reinterpret_cast<void*>(&fabsf)},
|
||||||
{"floorf", reinterpret_cast<void*>(&floorf)},
|
{"floorf", reinterpret_cast<void*>(&floorf)},
|
||||||
{"ceilf", reinterpret_cast<void*>(&ceilf)},
|
{"ceilf", reinterpret_cast<void*>(&ceilf)},
|
||||||
{"roundf", reinterpret_cast<void*>(&roundf)},
|
{"roundf", reinterpret_cast<void*>(&roundf)},
|
||||||
{"truncf", reinterpret_cast<void*>(&truncf)},
|
{"truncf", reinterpret_cast<void*>(&truncf)},
|
||||||
{"atan2f", reinterpret_cast<void*>(&atan2f)},
|
{"atan2f", reinterpret_cast<void*>(&atan2f)},
|
||||||
{"fmodf", reinterpret_cast<void*>(&fmodf)},
|
{"fmodf", reinterpret_cast<void*>(&fmodf)},
|
||||||
{"remainderf", reinterpret_cast<void*>(&remainderf)},
|
{"remainderf", reinterpret_cast<void*>(&remainderf)},
|
||||||
|
|
||||||
// float -> half & half -> float conversions
|
// float -> half & half -> float conversions
|
||||||
{"__gnu_h2f_ieee",
|
{"__gnu_h2f_ieee",
|
||||||
reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)},
|
reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)},
|
||||||
{"__gnu_f2h_ieee",
|
{"__gnu_f2h_ieee",
|
||||||
reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)},
|
reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)},
|
||||||
|
|
||||||
#if !defined(_MSC_VER) && defined(__x86_64__)
|
#if !defined(_MSC_VER) && defined(__x86_64__)
|
||||||
// FP32 Sleef functions -- SSE
|
// FP32 Sleef functions -- SSE
|
||||||
{"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)},
|
{"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)},
|
||||||
{"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)},
|
{"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)},
|
||||||
{"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)},
|
{"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)},
|
||||||
{"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)},
|
{"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)},
|
||||||
{"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)},
|
{"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)},
|
||||||
{"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)},
|
{"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)},
|
||||||
{"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)},
|
{"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)},
|
||||||
{"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)},
|
{"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)},
|
||||||
{"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)},
|
{"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)},
|
||||||
{"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)},
|
{"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)},
|
||||||
{"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)},
|
{"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)},
|
||||||
{"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)},
|
{"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)},
|
||||||
{"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)},
|
{"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)},
|
||||||
{"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)},
|
{"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)},
|
||||||
{"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)},
|
{"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)},
|
||||||
{"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)},
|
{"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)},
|
||||||
{"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)},
|
{"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)},
|
||||||
{"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)},
|
{"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)},
|
||||||
{"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)},
|
{"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)},
|
||||||
{"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)},
|
{"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)},
|
||||||
{"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)},
|
{"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)},
|
||||||
{"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)},
|
{"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)},
|
||||||
{"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)},
|
{"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)},
|
||||||
{"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)},
|
{"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)},
|
||||||
{"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)},
|
{"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)},
|
||||||
{"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)},
|
{"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)},
|
||||||
{"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)},
|
{"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)},
|
||||||
|
|
||||||
// FP32 Sleef functions -- AVX2
|
// FP32 Sleef functions -- AVX2
|
||||||
{"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)},
|
{"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)},
|
||||||
{"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)},
|
{"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)},
|
||||||
{"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)},
|
{"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)},
|
||||||
{"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)},
|
{"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)},
|
||||||
{"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)},
|
{"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)},
|
||||||
{"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)},
|
{"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)},
|
||||||
{"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)},
|
{"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)},
|
||||||
{"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)},
|
{"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)},
|
||||||
{"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)},
|
{"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)},
|
||||||
{"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)},
|
{"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)},
|
||||||
{"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)},
|
{"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)},
|
||||||
{"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)},
|
{"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)},
|
||||||
{"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)},
|
{"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)},
|
||||||
{"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)},
|
{"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)},
|
||||||
{"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)},
|
{"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)},
|
||||||
{"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)},
|
{"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)},
|
||||||
{"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)},
|
{"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)},
|
||||||
{"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)},
|
{"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)},
|
||||||
{"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)},
|
{"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)},
|
||||||
{"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)},
|
{"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)},
|
||||||
{"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)},
|
{"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)},
|
||||||
{"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)},
|
{"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)},
|
||||||
{"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)},
|
{"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)},
|
||||||
{"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)},
|
{"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)},
|
||||||
{"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)},
|
{"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)},
|
||||||
{"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)},
|
{"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)},
|
||||||
{"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)},
|
{"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)},
|
||||||
|
|
||||||
// FP64 Sleef functions -- SSE
|
// FP64 Sleef functions -- SSE
|
||||||
{"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)},
|
{"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)},
|
||||||
{"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)},
|
{"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)},
|
||||||
{"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)},
|
{"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)},
|
||||||
{"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)},
|
{"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)},
|
||||||
{"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)},
|
{"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)},
|
||||||
{"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)},
|
{"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)},
|
||||||
{"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)},
|
{"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)},
|
||||||
{"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)},
|
{"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)},
|
||||||
{"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)},
|
{"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)},
|
||||||
{"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)},
|
{"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)},
|
||||||
{"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)},
|
{"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)},
|
||||||
{"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)},
|
{"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)},
|
||||||
{"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)},
|
{"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)},
|
||||||
{"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)},
|
{"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)},
|
||||||
{"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)},
|
{"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)},
|
||||||
{"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)},
|
{"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)},
|
||||||
{"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)},
|
{"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)},
|
||||||
{"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)},
|
{"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)},
|
||||||
{"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)},
|
{"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)},
|
||||||
{"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)},
|
{"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)},
|
||||||
{"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)},
|
{"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)},
|
||||||
{"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)},
|
{"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)},
|
||||||
{"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)},
|
{"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)},
|
||||||
{"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)},
|
{"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)},
|
||||||
{"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)},
|
{"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)},
|
||||||
{"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)},
|
{"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)},
|
||||||
{"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)},
|
{"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)},
|
||||||
|
|
||||||
// FP64 Sleef functions -- AVX2
|
// FP64 Sleef functions -- AVX2
|
||||||
{"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)},
|
{"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)},
|
||||||
{"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)},
|
{"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)},
|
||||||
{"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)},
|
{"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)},
|
||||||
{"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)},
|
{"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)},
|
||||||
{"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)},
|
{"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)},
|
||||||
{"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)},
|
{"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)},
|
||||||
{"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)},
|
{"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)},
|
||||||
{"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)},
|
{"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)},
|
||||||
{"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)},
|
{"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)},
|
||||||
{"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)},
|
{"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)},
|
||||||
{"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)},
|
{"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)},
|
||||||
{"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)},
|
{"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)},
|
||||||
{"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)},
|
{"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)},
|
||||||
{"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)},
|
{"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)},
|
||||||
{"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)},
|
{"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)},
|
||||||
{"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)},
|
{"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)},
|
||||||
{"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)},
|
{"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)},
|
||||||
{"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)},
|
{"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)},
|
||||||
{"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)},
|
{"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)},
|
||||||
{"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)},
|
{"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)},
|
||||||
{"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)},
|
{"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)},
|
||||||
{"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)},
|
{"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)},
|
||||||
{"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)},
|
{"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)},
|
||||||
{"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)},
|
{"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)},
|
||||||
{"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)},
|
{"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)},
|
||||||
{"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)},
|
{"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)},
|
||||||
{"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)},
|
{"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)},
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
return c10::ArrayRef<SymbolAddress>(symbolAddresses);
|
return c10::ArrayRef<SymbolAddress>(symbolAddresses);
|
||||||
|
@ -36,12 +36,11 @@ enum class C10_API_ENUM Aggregation {
|
|||||||
MIN = 6,
|
MIN = 6,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TORCH_API AggregationHash {
|
struct TORCH_API AggregationHash{template <typename T> std::size_t operator()(
|
||||||
template <typename T>
|
T t) const {return static_cast<std::size_t>(t);
|
||||||
std::size_t operator()(T t) const {
|
} // namespace torch::monitor
|
||||||
return static_cast<std::size_t>(t);
|
}
|
||||||
}
|
;
|
||||||
};
|
|
||||||
|
|
||||||
// aggregationName returns the human readable name corresponding to the
|
// aggregationName returns the human readable name corresponding to the
|
||||||
// aggregation.
|
// aggregation.
|
||||||
|
@ -808,56 +808,58 @@ void generateForwardBackwardLink(
|
|||||||
|
|
||||||
void generateForwardBackwardLinks(
|
void generateForwardBackwardLinks(
|
||||||
std::unique_ptr<torch::profiler::impl::kineto::trace_t>& cpu_trace,
|
std::unique_ptr<torch::profiler::impl::kineto::trace_t>& cpu_trace,
|
||||||
const std::vector<std::shared_ptr<Result>>& results){
|
const std::vector<std::shared_ptr<Result>>& results) {
|
||||||
#ifndef USE_KINETO
|
#ifndef USE_KINETO
|
||||||
}
|
}
|
||||||
#else // USE_KINETO
|
#else // USE_KINETO
|
||||||
TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size());
|
TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size());
|
||||||
|
|
||||||
// startThreadId_seqNum to pointer of activity.
|
// startThreadId_seqNum to pointer of activity.
|
||||||
// Low-16bits of startThreadId and low-48bits seqNum are concatenated into
|
// Low-16bits of startThreadId and low-48bits seqNum are concatenated into
|
||||||
// one uint64_t variable as key.
|
// one uint64_t variable as key.
|
||||||
|
|
||||||
std::unordered_map<uint64_t, libkineto::GenericTraceActivity*> tidSeq2activity;
|
std::unordered_map<uint64_t, libkineto::GenericTraceActivity*>
|
||||||
uint64_t fwd_bwd_link_id = 1;
|
tidSeq2activity;
|
||||||
|
uint64_t fwd_bwd_link_id = 1;
|
||||||
|
|
||||||
using result_activity_t = std::pair<Result*, libkineto::GenericTraceActivity*>;
|
using result_activity_t =
|
||||||
std::vector<result_activity_t> torch_events;
|
std::pair<Result*, libkineto::GenericTraceActivity*>;
|
||||||
|
std::vector<result_activity_t> torch_events;
|
||||||
|
|
||||||
for (const auto idx : c10::irange(cpu_trace->activities.size())) {
|
for (const auto idx : c10::irange(cpu_trace->activities.size())) {
|
||||||
auto& profiler_result = results[idx];
|
auto& profiler_result = results[idx];
|
||||||
auto& activity = cpu_trace->activities[idx];
|
auto& activity = cpu_trace->activities[idx];
|
||||||
|
|
||||||
// add information about an associated forward op, if a sequence number
|
// add information about an associated forward op, if a sequence number
|
||||||
// is available (e.g. during training)
|
// is available (e.g. during training)
|
||||||
|
|
||||||
profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>(
|
profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>(
|
||||||
[&](const auto& e) {
|
[&](const auto& e) {
|
||||||
if (e.sequence_number_ >= 0) {
|
if (e.sequence_number_ >= 0) {
|
||||||
torch_events.emplace_back(profiler_result.get(), activity.get());
|
torch_events.emplace_back(profiler_result.get(), activity.get());
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to visit the events in chronological order.
|
||||||
|
// So we sort them by end_time_ns_ before processing.
|
||||||
|
std::sort(
|
||||||
|
torch_events.begin(),
|
||||||
|
torch_events.end(),
|
||||||
|
[](const result_activity_t& left, const result_activity_t& right) {
|
||||||
|
auto left_end_time =
|
||||||
|
std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
|
||||||
|
.end_time_ns_;
|
||||||
|
auto right_end_time = std::get<ExtraFields<EventType::TorchOp>>(
|
||||||
|
right.first->extra_fields_)
|
||||||
|
.end_time_ns_;
|
||||||
|
return left_end_time < right_end_time;
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
// We need to visit the events in chronological order.
|
for (auto& [profiler_result, activity] : torch_events) {
|
||||||
// So we sort them by end_time_ns_ before processing.
|
generateForwardBackwardLink(
|
||||||
std::sort(
|
*profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
|
||||||
torch_events.begin(),
|
}
|
||||||
torch_events.end(),
|
|
||||||
[](const result_activity_t& left, const result_activity_t& right) {
|
|
||||||
auto left_end_time =
|
|
||||||
std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
|
|
||||||
.end_time_ns_;
|
|
||||||
auto right_end_time =
|
|
||||||
std::get<ExtraFields<EventType::TorchOp>>(right.first->extra_fields_)
|
|
||||||
.end_time_ns_;
|
|
||||||
return left_end_time < right_end_time;
|
|
||||||
});
|
|
||||||
|
|
||||||
for (auto& [profiler_result, activity] : torch_events) {
|
|
||||||
generateForwardBackwardLink(
|
|
||||||
*profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif // USE_KINETO
|
#endif // USE_KINETO
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ void PerfEvent::Init() {
|
|||||||
TORCH_CHECK(false, "Unsupported profiler event name: ", name_);
|
TORCH_CHECK(false, "Unsupported profiler event name: ", name_);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct perf_event_attr attr {};
|
struct perf_event_attr attr{};
|
||||||
|
|
||||||
attr.size = sizeof(perf_event_attr);
|
attr.size = sizeof(perf_event_attr);
|
||||||
attr.type = it->second.first;
|
attr.type = it->second.first;
|
||||||
|
@ -673,8 +673,8 @@ void initPythonBindings(PyObject* module) {
|
|||||||
{nullptr},
|
{nullptr},
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyTypeObject RecordFunctionFast_Type = { PyVarObject_HEAD_INIT(nullptr,
|
static PyTypeObject RecordFunctionFast_Type = {
|
||||||
0)
|
PyVarObject_HEAD_INIT(nullptr, 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast",
|
RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast",
|
||||||
|
@ -46,7 +46,7 @@ struct MemFile {
|
|||||||
"failed to open {}: {}",
|
"failed to open {}: {}",
|
||||||
filename_,
|
filename_,
|
||||||
c10::utils::str_error(errno));
|
c10::utils::str_error(errno));
|
||||||
struct stat s {};
|
struct stat s{};
|
||||||
if (-1 == fstat(fd_, &s)) {
|
if (-1 == fstat(fd_, &s)) {
|
||||||
close(fd_); // destructors don't run during exceptions
|
close(fd_); // destructors don't run during exceptions
|
||||||
UNWIND_CHECK(
|
UNWIND_CHECK(
|
||||||
|
@ -101,11 +101,10 @@
|
|||||||
#define THPBoolUtils_newReal(value) THPUtils_newReal_BOOL(value)
|
#define THPBoolUtils_newReal(value) THPUtils_newReal_BOOL(value)
|
||||||
#define THPBoolUtils_checkAccreal(object) THPUtils_checkReal_BOOL(object)
|
#define THPBoolUtils_checkAccreal(object) THPUtils_checkReal_BOOL(object)
|
||||||
#define THPBoolUtils_unpackAccreal(object) \
|
#define THPBoolUtils_unpackAccreal(object) \
|
||||||
(int64_t) THPUtils_unpackReal_BOOL(object)
|
(int64_t)THPUtils_unpackReal_BOOL(object)
|
||||||
#define THPBoolUtils_newAccreal(value) THPUtils_newReal_BOOL(value)
|
#define THPBoolUtils_newAccreal(value) THPUtils_newReal_BOOL(value)
|
||||||
#define THPLongUtils_checkReal(object) THPUtils_checkReal_INT(object)
|
#define THPLongUtils_checkReal(object) THPUtils_checkReal_INT(object)
|
||||||
#define THPLongUtils_unpackReal(object) \
|
#define THPLongUtils_unpackReal(object) (int64_t)THPUtils_unpackReal_INT(object)
|
||||||
(int64_t) THPUtils_unpackReal_INT(object)
|
|
||||||
#define THPLongUtils_newReal(value) THPUtils_newReal_INT(value)
|
#define THPLongUtils_newReal(value) THPUtils_newReal_INT(value)
|
||||||
#define THPIntUtils_checkReal(object) THPUtils_checkReal_INT(object)
|
#define THPIntUtils_checkReal(object) THPUtils_checkReal_INT(object)
|
||||||
#define THPIntUtils_unpackReal(object) (int)THPUtils_unpackReal_INT(object)
|
#define THPIntUtils_unpackReal(object) (int)THPUtils_unpackReal_INT(object)
|
||||||
|
@ -362,7 +362,7 @@ TORCH_API void THP_encodeBuffer<c10::complex<double>>(
|
|||||||
|
|
||||||
#define DEFINE_ENCODE(TYPE) \
|
#define DEFINE_ENCODE(TYPE) \
|
||||||
template TORCH_API void THP_encodeBuffer<TYPE>( \
|
template TORCH_API void THP_encodeBuffer<TYPE>( \
|
||||||
uint8_t * dst, const TYPE* src, THPByteOrder order, size_t len);
|
uint8_t* dst, const TYPE* src, THPByteOrder order, size_t len);
|
||||||
|
|
||||||
DEFINE_ENCODE(int16_t)
|
DEFINE_ENCODE(int16_t)
|
||||||
DEFINE_ENCODE(int32_t)
|
DEFINE_ENCODE(int32_t)
|
||||||
|
Reference in New Issue
Block a user