[Lint] Update clang-format to 19.1.4 (#153889)

All changes other than the one to `tools/linter/adapters/s3_init_config.json` are generated by newer clang-format
Pull Request resolved: https://github.com/pytorch/pytorch/pull/153889
Approved by: https://github.com/cyyever, https://github.com/atalman
This commit is contained in:
Nikita Shulga
2025-05-19 15:36:11 -07:00
committed by PyTorch MergeBot
parent d869ea11e0
commit c4d1ff02f8
59 changed files with 550 additions and 522 deletions

View File

@ -395,8 +395,7 @@ class Vectorized<double> {
})} // Comparison using the _CMP_**_OQ predicate. })} // Comparison using the _CMP_**_OQ predicate.
// `O`: get false if an operand is NaN // `O`: get false if an operand is NaN
// `Q`: do not raise if an operand is NaN // `Q`: do not raise if an operand is NaN
Vectorized<double> Vectorized<double> operator==(const Vectorized<double>& other) const {
operator==(const Vectorized<double>& other) const {
svbool_t mask = svcmpeq_f64(ptrue, values, other); svbool_t mask = svcmpeq_f64(ptrue, values, other);
return svsel_f64(mask, ALL_F64_TRUE_MASK, ALL_F64_FALSE_MASK); return svsel_f64(mask, ALL_F64_TRUE_MASK, ALL_F64_FALSE_MASK);
} }

View File

@ -497,8 +497,7 @@ class Vectorized<float> {
})} // Comparison using the _CMP_**_OQ predicate. })} // Comparison using the _CMP_**_OQ predicate.
// `O`: get false if an operand is NaN // `O`: get false if an operand is NaN
// `Q`: do not raise if an operand is NaN // `Q`: do not raise if an operand is NaN
Vectorized<float> Vectorized<float> operator==(const Vectorized<float>& other) const {
operator==(const Vectorized<float>& other) const {
svbool_t mask = svcmpeq_f32(ptrue, values, other); svbool_t mask = svcmpeq_f32(ptrue, values, other);
return svsel_f32(mask, ALL_F32_TRUE_MASK, ALL_F32_FALSE_MASK); return svsel_f32(mask, ALL_F32_TRUE_MASK, ALL_F32_FALSE_MASK);
} }

View File

@ -97,14 +97,14 @@ class Vectorized<float> {
const Vectorized<float>& a, const Vectorized<float>& a,
const Vectorized<float>& b) { const Vectorized<float>& b) {
Vectorized<float> vec; Vectorized<float> vec;
vec.values = vec.values = BlendRegs < 0,
BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values); (mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 1,
BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values); (mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 2,
BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values); (mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 3,
BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values); (mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
return vec; return vec;
} }
static Vectorized<float> blendv( static Vectorized<float> blendv(

View File

@ -13,8 +13,7 @@ inline namespace CPU_CAPABILITY {
template < template <
typename VecT, typename VecT,
typename ValueT, typename ValueT,
template <int, bool> template <int, bool> typename BlendRegs,
typename BlendRegs,
typename Derived> typename Derived>
struct Vectorized16 { struct Vectorized16 {
protected: protected:
@ -54,23 +53,23 @@ struct Vectorized16 {
template <int64_t mask> template <int64_t mask>
static Derived blend(const Derived& a, const Derived& b) { static Derived blend(const Derived& a, const Derived& b) {
Derived vec; Derived vec;
vec.values = vec.values = BlendRegs < 0,
BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values); (mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 1,
BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values); (mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 2,
BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values); (mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 3,
BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values); (mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 4,
BlendRegs<4, (mask & 0x10) != 0>::impl(a.values, b.values, vec.values); (mask & 0x10) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 5,
BlendRegs<5, (mask & 0x20) != 0>::impl(a.values, b.values, vec.values); (mask & 0x20) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 6,
BlendRegs<6, (mask & 0x40) != 0>::impl(a.values, b.values, vec.values); (mask & 0x40) != 0 > ::impl(a.values, b.values, vec.values);
vec.values = vec.values = BlendRegs < 7,
BlendRegs<7, (mask & 0x80) != 0>::impl(a.values, b.values, vec.values); (mask & 0x80) != 0 > ::impl(a.values, b.values, vec.values);
return vec; return vec;
} }

View File

@ -780,8 +780,8 @@ void MPSProfiler::handleIntSignal(int signal) {
} }
// used to capture sigint signal to log profiling stats // used to capture sigint signal to log profiling stats
struct sigaction MPSProfiler::currentSigint {}; struct sigaction MPSProfiler::currentSigint{};
struct sigaction MPSProfiler::previousSigint {}; struct sigaction MPSProfiler::previousSigint{};
bool MPSProfiler::isCapturing() const { bool MPSProfiler::isCapturing() const {
return [captureManager isCapturing]; return [captureManager isCapturing];

View File

@ -216,7 +216,7 @@ struct RNNDescriptorParams {
cudnnDataType_t datatype, cudnnDataType_t datatype,
cudnnDataType_t input_datatype) { cudnnDataType_t input_datatype) {
#endif #endif
this->set_mode(mode); this -> set_mode(mode);
#ifdef USE_CUDNN_RNN_V8_API #ifdef USE_CUDNN_RNN_V8_API
this->input_size = input_size; this->input_size = input_size;
this->packed = packed; this->packed = packed;

View File

@ -153,12 +153,12 @@ kernel void searchsorted(
constant INPUT_T * data_in [[buffer(0)]], \ constant INPUT_T * data_in [[buffer(0)]], \
constant INPUT_T * data_bd [[buffer(1)]], \ constant INPUT_T * data_bd [[buffer(1)]], \
device OUTPUT_T * data_out [[buffer(2)]], \ device OUTPUT_T * data_out [[buffer(2)]], \
constant int64_t & idim_in [[buffer(3)]], \ constant int64_t& idim_in [[buffer(3)]], \
constant int64_t & idim_bd [[buffer(4)]], \ constant int64_t& idim_bd [[buffer(4)]], \
constant int64_t & numel_in [[buffer(5)]], \ constant int64_t& numel_in [[buffer(5)]], \
constant int64_t & right [[buffer(6)]], \ constant int64_t& right [[buffer(6)]], \
constant int64_t & is_1d_boundaries [[buffer(7)]], \ constant int64_t& is_1d_boundaries [[buffer(7)]], \
constant int64_t * data_sort [[buffer(8)]], \ constant int64_t* data_sort [[buffer(8)]], \
uint2 tgid [[threadgroup_position_in_grid]], \ uint2 tgid [[threadgroup_position_in_grid]], \
uint2 tid2 [[thread_position_in_threadgroup]], \ uint2 tid2 [[thread_position_in_threadgroup]], \
uint2 tptg [[threads_per_threadgroup]]); \ uint2 tptg [[threads_per_threadgroup]]); \
@ -167,11 +167,11 @@ kernel void searchsorted(
constant INPUT_T * data_in [[buffer(0)]], \ constant INPUT_T * data_in [[buffer(0)]], \
constant INPUT_T * data_bd [[buffer(1)]], \ constant INPUT_T * data_bd [[buffer(1)]], \
device OUTPUT_T * data_out [[buffer(2)]], \ device OUTPUT_T * data_out [[buffer(2)]], \
constant int64_t & idim_in [[buffer(3)]], \ constant int64_t& idim_in [[buffer(3)]], \
constant int64_t & idim_bd [[buffer(4)]], \ constant int64_t& idim_bd [[buffer(4)]], \
constant int64_t & numel_in [[buffer(5)]], \ constant int64_t& numel_in [[buffer(5)]], \
constant int64_t & right [[buffer(6)]], \ constant int64_t& right [[buffer(6)]], \
constant int64_t & is_1d_boundaries [[buffer(7)]], \ constant int64_t& is_1d_boundaries [[buffer(7)]], \
uint2 tgid [[threadgroup_position_in_grid]], \ uint2 tgid [[threadgroup_position_in_grid]], \
uint2 tid2 [[thread_position_in_threadgroup]], \ uint2 tid2 [[thread_position_in_threadgroup]], \
uint2 tptg [[threads_per_threadgroup]]); uint2 tptg [[threads_per_threadgroup]]);

View File

@ -94,21 +94,21 @@ kernel void histogramdd(
} }
} }
#define REGISTER_HISTOGRAMDD_OP(DTYPE) \ #define REGISTER_HISTOGRAMDD_OP(DTYPE) \
template [[host_name("histogramdd_" #DTYPE)]] kernel void \ template [[host_name("histogramdd_" #DTYPE)]] kernel void \
histogramdd<DTYPE>( \ histogramdd<DTYPE>( \
constant DTYPE * input_ [[buffer(0)]], \ constant DTYPE * input_ [[buffer(0)]], \
constant DTYPE * weight [[buffer(1)]], \ constant DTYPE * weight [[buffer(1)]], \
device DTYPE * local_out [[buffer(2)]], \ device DTYPE * local_out [[buffer(2)]], \
constant uint * offsets [[buffer(3)]], \ constant uint * offsets [[buffer(3)]], \
constant size_t & num_dims [[buffer(4)]], \ constant size_t& num_dims [[buffer(4)]], \
constant DTYPE * bin_seq [[buffer(5)]], \ constant DTYPE* bin_seq [[buffer(5)]], \
constant int64_t * num_bin_edges [[buffer(6)]], \ constant int64_t* num_bin_edges [[buffer(6)]], \
constant DTYPE * leftmost_edge [[buffer(7)]], \ constant DTYPE* leftmost_edge [[buffer(7)]], \
constant DTYPE * rightmost_edge [[buffer(8)]], \ constant DTYPE* rightmost_edge [[buffer(8)]], \
constant int64_t * local_out_strides [[buffer(9)]], \ constant int64_t* local_out_strides [[buffer(9)]], \
constant uint8_t & bin_selection_algorithm [[buffer(10)]], \ constant uint8_t& bin_selection_algorithm [[buffer(10)]], \
constant uint8_t & has_weight [[buffer(11)]], \ constant uint8_t& has_weight [[buffer(11)]], \
uint tid [[thread_position_in_grid]]); uint tid [[thread_position_in_grid]]);
REGISTER_HISTOGRAMDD_OP(float); REGISTER_HISTOGRAMDD_OP(float);

View File

@ -209,11 +209,11 @@ kernel void triu_indices(
template [[host_name(#NAME "_indices_" #DTYPE)]] kernel void \ template [[host_name(#NAME "_indices_" #DTYPE)]] kernel void \
NAME##_indices<DTYPE>( \ NAME##_indices<DTYPE>( \
device DTYPE * tensor, \ device DTYPE * tensor, \
constant int64_t & col_offset, \ constant int64_t& col_offset, \
constant int64_t & m_first_row, \ constant int64_t& m_first_row, \
constant int64_t & col, \ constant int64_t& col, \
constant int64_t & rectangle_size, \ constant int64_t& rectangle_size, \
constant int64_t & triu_size, \ constant int64_t& triu_size, \
uint linear_index [[thread_position_in_grid]]) uint linear_index [[thread_position_in_grid]])
INSTANTIATE_TRI_INDICES(triu, long); INSTANTIATE_TRI_INDICES(triu, long);

View File

@ -88,15 +88,13 @@ Tensor unsqueeze(const at::Tensor& self, int64_t dim) {
} }
// Create the params buffer // Create the params buffer
struct Block block { struct Block block{{
{
// Dimension to unsqueeze // Dimension to unsqueeze
static_cast<int32_t>(dim), static_cast<int32_t>(dim),
// Keep track of the channel in Image3D // Keep track of the channel in Image3D
static_cast<int32_t>( static_cast<int32_t>(
std::ceil(static_cast<float>(output_size[channel_index]) / 4)), std::ceil(static_cast<float>(output_size[channel_index]) / 4)),
} }};
};
api::UniformParamsBuffer params(context, block); api::UniformParamsBuffer params(context, block);

View File

@ -193,11 +193,11 @@ struct C10_API AutogradMetaFactory {
C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory); C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
C10_API AutogradMetaFactory* GetAutogradMetaFactory(); C10_API AutogradMetaFactory* GetAutogradMetaFactory();
struct C10_API AutogradMetaFactoryRegisterer { struct C10_API AutogradMetaFactoryRegisterer{
explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory* factory) { explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory * factory){
SetAutogradMetaFactory(factory); SetAutogradMetaFactory(factory);
} } // namespace impl
}; }; // namespace c10
} // namespace impl } // namespace impl

View File

@ -286,7 +286,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \ #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \
((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \ ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
? (blocks_per_sm) \ ? (blocks_per_sm) \
: ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) / \ : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / \
(threads_per_block)))) (threads_per_block))))
// C10_LAUNCH_BOUNDS is analogous to __launch_bounds__ // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
#define C10_LAUNCH_BOUNDS_0 \ #define C10_LAUNCH_BOUNDS_0 \

View File

@ -68,8 +68,7 @@ static_assert(
} // namespace test_function_traits } // namespace test_function_traits
struct MovableOnly { struct MovableOnly {
constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */ constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */ }
}
MovableOnly(const MovableOnly&) = delete; MovableOnly(const MovableOnly&) = delete;
MovableOnly(MovableOnly&&) = default; MovableOnly(MovableOnly&&) = default;
MovableOnly& operator=(const MovableOnly&) = delete; MovableOnly& operator=(const MovableOnly&) = delete;

View File

@ -5,20 +5,20 @@
#if !defined(_WIN32) #if !defined(_WIN32)
static bool file_exists(const char* path) { static bool file_exists(const char* path) {
struct stat st {}; struct stat st{};
return stat(path, &st) == 0 && S_ISREG(st.st_mode); return stat(path, &st) == 0 && S_ISREG(st.st_mode);
} }
static bool directory_exists(const char* path) { static bool directory_exists(const char* path) {
struct stat st {}; struct stat st{};
return stat(path, &st) == 0 && S_ISDIR(st.st_mode); return stat(path, &st) == 0 && S_ISDIR(st.st_mode);
} }
#else #else
static bool file_exists(const char* path) { static bool file_exists(const char* path) {
struct _stat st {}; struct _stat st{};
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFREG); return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFREG);
} }
static bool directory_exists(const char* path) { static bool directory_exists(const char* path) {
struct _stat st {}; struct _stat st{};
return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFDIR); return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFDIR);
} }
#endif // !defined(_WIN32) #endif // !defined(_WIN32)

View File

@ -59,7 +59,7 @@ inline time_t getTime(bool allow_monotonic = false) {
.count(); .count();
#else #else
// clock_gettime is *much* faster than std::chrono implementation on Linux // clock_gettime is *much* faster than std::chrono implementation on Linux
struct timespec t {}; struct timespec t{};
auto mode = CLOCK_REALTIME; auto mode = CLOCK_REALTIME;
if (allow_monotonic) { if (allow_monotonic) {
mode = CLOCK_MONOTONIC; mode = CLOCK_MONOTONIC;

View File

@ -116,8 +116,8 @@ class C10_API Error : public std::exception {
class C10_API Warning { class C10_API Warning {
public: public:
class C10_API UserWarning {}; class C10_API UserWarning{};
class C10_API DeprecationWarning {}; class C10_API DeprecationWarning{};
using warning_variant_t = std::variant<UserWarning, DeprecationWarning>; using warning_variant_t = std::variant<UserWarning, DeprecationWarning>;

View File

@ -70,7 +70,9 @@ class LeftRight final {
~LeftRight() { ~LeftRight() {
// wait until any potentially running writers are finished // wait until any potentially running writers are finished
{ std::unique_lock<std::mutex> lock(_writeMutex); } {
std::unique_lock<std::mutex> lock(_writeMutex);
}
// wait until any potentially running readers are finished // wait until any potentially running readers are finished
while (_counters[0].load() != 0 || _counters[1].load() != 0) { while (_counters[0].load() != 0 || _counters[1].load() != 0) {

View File

@ -370,9 +370,9 @@ class SmallVectorTemplateCommon
/// note /// note
template < template <
typename T, typename T,
bool = (std::is_trivially_copy_constructible_v<T>)&&( bool = (std::is_trivially_copy_constructible_v<T>) &&
std::is_trivially_move_constructible_v< (std::is_trivially_move_constructible_v<T>) &&
T>)&&std::is_trivially_destructible_v<T>> std::is_trivially_destructible_v<T>>
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
friend class SmallVectorTemplateCommon<T>; friend class SmallVectorTemplateCommon<T>;

View File

@ -59,7 +59,7 @@ void hookupHandler() {
if (hookedUpCount++) { if (hookedUpCount++) {
return; return;
} }
struct sigaction sa {}; struct sigaction sa{};
// Setup the handler // Setup the handler
sa.sa_handler = &handleSignal; sa.sa_handler = &handleSignal;
// Restart the system call, if at all possible // Restart the system call, if at all possible
@ -80,7 +80,7 @@ void unhookHandler() {
if (--hookedUpCount > 0) { if (--hookedUpCount > 0) {
return; return;
} }
struct sigaction sa {}; struct sigaction sa{};
// Setup the sighub handler // Setup the sighub handler
sa.sa_handler = SIG_DFL; sa.sa_handler = SIG_DFL;
// Restart the system call, if at all possible // Restart the system call, if at all possible
@ -273,7 +273,7 @@ void FatalSignalHandler::installFatalSignalHandlers() {
return; return;
} }
fatalSignalHandlersInstalled = true; fatalSignalHandlersInstalled = true;
struct sigaction sa {}; struct sigaction sa{};
sigemptyset(&sa.sa_mask); sigemptyset(&sa.sa_mask);
// Since we'll be in an exiting situation it's possible there's memory // Since we'll be in an exiting situation it's possible there's memory
// corruption, so make our own stack just in case. // corruption, so make our own stack just in case.

View File

@ -88,7 +88,7 @@ class C10_API FatalSignalHandler {
bool fatalSignalHandlersInstalled; bool fatalSignalHandlersInstalled;
// We need to hold a reference to call the previous SIGUSR2 handler in case // We need to hold a reference to call the previous SIGUSR2 handler in case
// we didn't signal it // we didn't signal it
struct sigaction previousSigusr2 {}; struct sigaction previousSigusr2{};
// Flag dictating whether the SIGUSR2 handler falls back to previous handlers // Flag dictating whether the SIGUSR2 handler falls back to previous handlers
// or is intercepted in order to print a stack trace. // or is intercepted in order to print a stack trace.
std::atomic<bool> fatalSignalReceived; std::atomic<bool> fatalSignalReceived;

View File

@ -87,7 +87,7 @@ struct DummyChunkDataReader : public datasets::ChunkDataReader<int> {
return chunk_count_; return chunk_count_;
}; };
void reset() override{}; void reset() override {};
const static size_t chunk_count_ = 3; const static size_t chunk_count_ = 3;
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-avoid-c-arrays) // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-avoid-c-arrays)
@ -1479,7 +1479,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
void reset() override { void reset() override {
counter = 0; counter = 0;
} }
void save(torch::serialize::OutputArchive& archive) const override{}; void save(torch::serialize::OutputArchive& archive) const override {};
void load(torch::serialize::InputArchive& archive) override {} void load(torch::serialize::InputArchive& archive) override {}
int counter = 0; int counter = 0;
}; };
@ -1517,7 +1517,7 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
void reset() override { void reset() override {
counter = 0; counter = 0;
} }
void save(torch::serialize::OutputArchive& archive) const override{}; void save(torch::serialize::OutputArchive& archive) const override {};
void load(torch::serialize::InputArchive& archive) override {} void load(torch::serialize::InputArchive& archive) override {}
int counter = 0; int counter = 0;
std::mutex mutex; std::mutex mutex;
@ -1556,7 +1556,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
void reset() override { void reset() override {
counter = 0; counter = 0;
} }
void save(torch::serialize::OutputArchive& archive) const override{}; void save(torch::serialize::OutputArchive& archive) const override {};
void load(torch::serialize::InputArchive& archive) override {} void load(torch::serialize::InputArchive& archive) override {}
int counter = 0; int counter = 0;
}; };
@ -1605,7 +1605,7 @@ TEST(DataLoaderTest, StatefulDatasetWithCollate) {
void reset() override { void reset() override {
counter = 0; counter = 0;
} }
void save(torch::serialize::OutputArchive& archive) const override{}; void save(torch::serialize::OutputArchive& archive) const override {};
void load(torch::serialize::InputArchive& archive) override {} void load(torch::serialize::InputArchive& archive) override {}
int counter = 0; int counter = 0;
}; };
@ -1747,7 +1747,7 @@ TEST(DataLoaderTest, ChunkDataSetWithEmptyBatch) {
return 1; return 1;
}; };
void reset() override{}; void reset() override {};
}; };
const size_t prefetch_count = 1; const size_t prefetch_count = 1;
@ -1791,7 +1791,7 @@ TEST(DataLoaderTest, ChunkDataSetGetBatchWithUnevenBatchSize) {
return 2; return 2;
}; };
void reset() override{}; void reset() override {};
}; };
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
@ -1936,7 +1936,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
return chunk_count_; return chunk_count_;
}; };
void reset() override{}; void reset() override {};
BatchType batch_data_ = BatchType(chunk_size, 0); BatchType batch_data_ = BatchType(chunk_size, 0);
}; };
@ -2115,7 +2115,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
class S : public samplers::Sampler<> { class S : public samplers::Sampler<> {
public: public:
explicit S(size_t size) : size_(size), index_(0){}; explicit S(size_t size) : size_(size), index_(0) {};
void reset(std::optional<size_t> new_size = std::nullopt) override { void reset(std::optional<size_t> new_size = std::nullopt) override {
if (new_size.has_value()) { if (new_size.has_value()) {
@ -2170,7 +2170,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
return chunk_count_; return chunk_count_;
}; };
void reset() override{}; void reset() override {};
size_t chunk_count_; size_t chunk_count_;
}; };
@ -2258,7 +2258,7 @@ TEST(DataLoaderTest, CustomPreprocessPolicy) {
return chunk_count_; return chunk_count_;
}; };
void reset() override{}; void reset() override {};
size_t chunk_count_; size_t chunk_count_;
}; };

View File

@ -127,9 +127,7 @@ TEST_F(ModulesTest, Conv2dSameStrided) {
[&] { Conv2d model_invalid(options.stride(2)); }(), [&] { Conv2d model_invalid(options.stride(2)); }(),
"padding='same' is not supported for strided convolutions"); "padding='same' is not supported for strided convolutions");
ASSERT_THROWS_WITH( ASSERT_THROWS_WITH(
[&] { [&] { Conv2d model_invalid(options.stride({1, 2})); }(),
Conv2d model_invalid(options.stride({1, 2}));
}(),
"padding='same' is not supported for strided convolutions"); "padding='same' is not supported for strided convolutions");
} }
@ -181,9 +179,7 @@ TEST_F(ModulesTest, Conv3dSameStrided) {
[&] { Conv3d model_invalid(options.stride(2)); }(), [&] { Conv3d model_invalid(options.stride(2)); }(),
"padding='same' is not supported for strided convolutions"); "padding='same' is not supported for strided convolutions");
ASSERT_THROWS_WITH( ASSERT_THROWS_WITH(
[&] { [&] { Conv3d model_invalid(options.stride({1, 2, 1})); }(),
Conv3d model_invalid(options.stride({1, 2, 1}));
}(),
"padding='same' is not supported for strided convolutions"); "padding='same' is not supported for strided convolutions");
} }

View File

@ -920,7 +920,9 @@ TEST(TensorTest, Arange) {
} }
TEST(TensorTest, PrettyPrintTensorDataContainer) { TEST(TensorTest, PrettyPrintTensorDataContainer) {
{ ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1"); } {
ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1");
}
{ {
ASSERT_EQ( ASSERT_EQ(
c10::str(torch::detail::TensorDataContainer({1.1, 2.2})), "{1.1, 2.2}"); c10::str(torch::detail::TensorDataContainer({1.1, 2.2})), "{1.1, 2.2}");

View File

@ -1033,9 +1033,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWrites) {
auto ops = torch::RegisterOperators().op( auto ops = torch::RegisterOperators().op(
"uses::list", "uses::list",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](torch::List<at::Tensor> in) { .catchAllKernel(
return torch::rand({2, 3}); [](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
})
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION)); .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
// Write to the inside of a list. Check that we can't reorder a // Write to the inside of a list. Check that we can't reorder a
// print across it. // print across it.
@ -1073,9 +1072,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWritesNested) {
auto ops = torch::RegisterOperators().op( auto ops = torch::RegisterOperators().op(
"uses::list", "uses::list",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](torch::List<at::Tensor> in) { .catchAllKernel(
return torch::rand({2, 3}); [](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
})
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION)); .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
// Write to the inside of a list. Check that we can't reorder a // Write to the inside of a list. Check that we can't reorder a
// print across it. // print across it.
@ -1257,9 +1255,8 @@ TEST(AliasRegistrationTest, ConservativeWithInferredSchema) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand1", "foo::rand1",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE)); .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
const auto rand_op = Symbol::fromQualString("foo::rand1"); const auto rand_op = Symbol::fromQualString("foo::rand1");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();
@ -1274,9 +1271,8 @@ TEST(AliasRegistrationTest, ConservativeWithSpecifiedSchema) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand2(Tensor arg1) -> Tensor", "foo::rand2(Tensor arg1) -> Tensor",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE)); .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
const auto rand_op = Symbol::fromQualString("foo::rand2"); const auto rand_op = Symbol::fromQualString("foo::rand2");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();
@ -1291,9 +1287,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand3(Tensor(a) arg1) -> Tensor(b)", "foo::rand3(Tensor(a) arg1) -> Tensor(b)",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE)); .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
const auto rand_op = Symbol::fromQualString("foo::rand3"); const auto rand_op = Symbol::fromQualString("foo::rand3");
@ -1312,9 +1307,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError2) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand4(Tensor(a) arg1) -> Tensor(a)", "foo::rand4(Tensor(a) arg1) -> Tensor(a)",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::CONSERVATIVE)); .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
const auto rand_op = Symbol::fromQualString("foo::rand4"); const auto rand_op = Symbol::fromQualString("foo::rand4");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();
@ -1334,9 +1328,8 @@ TEST(AliasRegistrationTest, FromSchemaWithInferredSchemaShouldError) {
torch::RegisterOperators().op( torch::RegisterOperators().op(
"foo::rand5", "foo::rand5",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA)); .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
}, },
"Tried to register operator foo::rand5(Tensor _0) -> Tensor _0 with AliasAnalysisKind::FROM_SCHEMA, but the schema is inferred"); "Tried to register operator foo::rand5(Tensor _0) -> Tensor _0 with AliasAnalysisKind::FROM_SCHEMA, but the schema is inferred");
@ -1346,9 +1339,8 @@ TEST(AliasRegistrationTest, FromSchemaInferredPure) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand6(Tensor arg1) -> Tensor", "foo::rand6(Tensor arg1) -> Tensor",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA)); .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
const auto rand_op = Symbol::fromQualString("foo::rand6"); const auto rand_op = Symbol::fromQualString("foo::rand6");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();
@ -1395,9 +1387,8 @@ TEST(AliasRegistrationTest, PureNoSchema) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand9", "foo::rand9",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION)); .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
const auto rand_op = Symbol::fromQualString("foo::rand9"); const auto rand_op = Symbol::fromQualString("foo::rand9");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();
@ -1412,9 +1403,8 @@ TEST(AliasRegistrationTest, PureWithSchema) {
auto registry = torch::RegisterOperators().op( auto registry = torch::RegisterOperators().op(
"foo::rand10(Tensor arg1) -> Tensor", "foo::rand10(Tensor arg1) -> Tensor",
torch::RegisterOperators::options() torch::RegisterOperators::options()
.catchAllKernel([](at::Tensor) -> at::Tensor { .catchAllKernel(
return at::rand({2, 2}); [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
})
.aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION)); .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
const auto rand_op = Symbol::fromQualString("foo::rand10"); const auto rand_op = Symbol::fromQualString("foo::rand10");
auto graph = std::make_shared<Graph>(); auto graph = std::make_shared<Graph>();

View File

@ -121,14 +121,10 @@ TEST(AutodiffTest, ADFormulas) {
{"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }}, {"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }},
{"view", {"view",
unary_pointwise_2d, unary_pointwise_2d,
[](const VL& v) -> VL { [](const VL& v) -> VL { return {v[0].view({3, 2})}; }},
return {v[0].view({3, 2})};
}},
{"expand", {"expand",
{{2, 1}}, {{2, 1}},
[](const VL& v) -> VL { [](const VL& v) -> VL { return {v[0].expand({2, 3})}; }},
return {v[0].expand({2, 3})};
}},
{"mm", {"mm",
{{10, 12}, {12, 15}}, {{10, 12}, {12, 15}},
[](const VL& v) -> VL { return {v[0].mm(v[1])}; }}, [](const VL& v) -> VL { return {v[0].mm(v[1])}; }},

View File

@ -863,8 +863,12 @@ void checkScopeCallbacks() {
{ {
RECORD_TORCHSCRIPT_FUNCTION("test_method", {}); RECORD_TORCHSCRIPT_FUNCTION("test_method", {});
{ RECORD_FUNCTION("test_function", {}); } {
{ RECORD_USER_SCOPE("test_user_scope"); } RECORD_FUNCTION("test_function", {});
}
{
RECORD_USER_SCOPE("test_user_scope");
}
} }
TORCH_CHECK(!bad_scope); TORCH_CHECK(!bad_scope);
@ -1057,7 +1061,9 @@ TEST(RecordFunctionTest, RecordFunctionGuard) {
RECORD_USER_SCOPE("C"); RECORD_USER_SCOPE("C");
} }
} }
{ RECORD_USER_SCOPE("D"); } {
RECORD_USER_SCOPE("D");
}
} }
} }
TORCH_CHECK(fn_names.size() == 1); TORCH_CHECK(fn_names.size() == 1);
@ -1084,7 +1090,9 @@ TEST(RecordFunctionTest, Callbacks) {
add_remove_test_add_cb<2>(); add_remove_test_add_cb<2>();
auto h3 = add_remove_test_add_cb<3>(); auto h3 = add_remove_test_add_cb<3>();
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 3); TORCH_CHECK(ids.size() == 3);
TORCH_CHECK(std::find(ids.begin(), ids.end(), 1) != ids.end()); TORCH_CHECK(std::find(ids.begin(), ids.end(), 1) != ids.end());
@ -1094,7 +1102,9 @@ TEST(RecordFunctionTest, Callbacks) {
ids.clear(); ids.clear();
removeCallback(h1); removeCallback(h1);
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 2); TORCH_CHECK(ids.size() == 2);
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end()); TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
@ -1103,7 +1113,9 @@ TEST(RecordFunctionTest, Callbacks) {
ids.clear(); ids.clear();
removeCallback(h3); removeCallback(h3);
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 1); TORCH_CHECK(ids.size() == 1);
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end()); TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
@ -1115,7 +1127,9 @@ TEST(RecordFunctionTest, Callbacks) {
ids.clear(); ids.clear();
add_remove_test_add_cb<1>(); add_remove_test_add_cb<1>();
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 1); TORCH_CHECK(ids.size() == 1);
TORCH_CHECK(ids[0] == 1); TORCH_CHECK(ids[0] == 1);
@ -1128,7 +1142,9 @@ TEST(RecordFunctionTest, Callbacks) {
return nullptr; return nullptr;
})); }));
{ RECORD_USER_SCOPE("test_thread"); } {
RECORD_USER_SCOPE("test_thread");
}
}); });
th.join(); th.join();
TORCH_CHECK(ids.size() == 2); TORCH_CHECK(ids.size() == 2);
@ -1136,7 +1152,9 @@ TEST(RecordFunctionTest, Callbacks) {
TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end()); TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
ids.clear(); ids.clear();
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 1); TORCH_CHECK(ids.size() == 1);
TORCH_CHECK(ids[0] == 1); TORCH_CHECK(ids[0] == 1);
@ -1167,7 +1185,9 @@ TEST(RecordFunctionTest, Callbacks) {
TORCH_CHECK(ctx->b == "test_str"); TORCH_CHECK(ctx->b == "test_str");
})); }));
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(ids.size() == 1); TORCH_CHECK(ids.size() == 1);
TORCH_CHECK(ids[0] == 1); TORCH_CHECK(ids[0] == 1);
@ -1193,7 +1213,9 @@ TEST(RecordFunctionTest, Callbacks) {
})); }));
// Will call both global and thread local callbacks. // Will call both global and thread local callbacks.
{ RECORD_USER_SCOPE("test_thread"); } {
RECORD_USER_SCOPE("test_thread");
}
}); });
ctx_th.join(); ctx_th.join();
TORCH_CHECK(ids.size() == 2); TORCH_CHECK(ids.size() == 2);
@ -1216,21 +1238,27 @@ TEST(RecordFunctionTest, ShouldRun) {
return nullptr; return nullptr;
})); }));
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
EXPECT_TRUE(ran) << "first run didn't happen"; EXPECT_TRUE(ran) << "first run didn't happen";
ran = false; ran = false;
disableCallback(handle); disableCallback(handle);
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
EXPECT_FALSE(ran) << "second run happened but shouldn't have"; EXPECT_FALSE(ran) << "second run happened but shouldn't have";
ran = false; ran = false;
reenableCallback(handle); reenableCallback(handle);
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
EXPECT_TRUE(ran) << "run after re-enable didn't happen"; EXPECT_TRUE(ran) << "run after re-enable didn't happen";
ran = false; ran = false;
@ -1273,7 +1301,9 @@ TEST(RecordFunctionTest, Basic) {
return nullptr; return nullptr;
}) })
.needsIds(true)); .needsIds(true));
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(has_ids); TORCH_CHECK(has_ids);
clearCallbacks(); clearCallbacks();
has_ids = false; has_ids = false;
@ -1282,7 +1312,9 @@ TEST(RecordFunctionTest, Basic) {
has_ids = fn.handle() > 0; has_ids = fn.handle() > 0;
return nullptr; return nullptr;
})); }));
{ RECORD_USER_SCOPE("test"); } {
RECORD_USER_SCOPE("test");
}
TORCH_CHECK(!has_ids); TORCH_CHECK(!has_ids);
clearCallbacks(); clearCallbacks();
} }

View File

@ -144,7 +144,9 @@ TEST(RecordFunctionTest, CallOrder) {
#undef REGISTER_CALLBACK #undef REGISTER_CALLBACK
RECORD_FUNCTION("Outer", {}); RECORD_FUNCTION("Outer", {});
{ RECORD_FUNCTION("Inner", {}); } {
RECORD_FUNCTION("Inner", {});
}
at::clearCallbacks(); at::clearCallbacks();
ASSERT_FALSE(at::hasCallbacks()); ASSERT_FALSE(at::hasCallbacks());

View File

@ -716,14 +716,13 @@ TEST(ExternalCall, UnaryFloat) {
std::string, std::string,
std::vector<ExprHandle>>; std::vector<ExprHandle>>;
std::vector<Test> tests = {}; std::vector<Test> tests = {};
tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) tests.push_back(Test{
{1, 64, 8, 9}, // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
{1, 64, 5, 7}, {1, 64, 8, 9},
[](at::Tensor x) { {1, 64, 5, 7},
return at::adaptive_avg_pool2d(x, {5, 7}); [](at::Tensor x) { return at::adaptive_avg_pool2d(x, {5, 7}); },
}, "nnc_aten_adaptive_avg_pool2d",
"nnc_aten_adaptive_avg_pool2d", toExprHandleVec({5, 7})});
toExprHandleVec({5, 7})});
tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
{100, 200}, {100, 200},
{100}, {100},

View File

@ -8,16 +8,16 @@
], ],
"clang-format": { "clang-format": {
"Darwin-arm": { "Darwin-arm": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/17.0.6/clang-format", "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/19.1.4/clang-format",
"hash": "47c47f3c8275fd6e25d07128ef9a655d3f898eae6a59a7c7a801967871bdb2f7" "hash": "f0da3ecf0ab1e9b50e8c27bd2d7ca0baa619e2f4b824b35d79d46356581fa552"
}, },
"Darwin-i386": { "Darwin-i386": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/17.0.6/clang-format", "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/19.1.4/clang-format",
"hash": "23423cbe62feb535c05c88e6f47e420ca2777603f90dff4d33d19b6f5177a79e" "hash": "f5eb5037b9aa9d1d2de650fb2e0fe1a2517768a462fae8e98791a67b698302f4"
}, },
"Linux": { "Linux": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/17.0.6/clang-format", "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/19.1.4/clang-format",
"hash": "920159a0fafc7c65f6819e8a0b739ecc8e655f50f20a3a1db975a3473b86431b" "hash": "bfa9ef6eccb372f79ffcb6196af966fd84519ea9567f5ae7b6ad30208cd82109"
} }
}, },
"clang-tidy": { "clang-tidy": {

View File

@ -37,7 +37,7 @@ using namespace torch;
auto _w = \ auto _w = \
write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char)); \ write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char)); \
(void)_w; \ (void)_w; \
struct sigaction sa {}; \ struct sigaction sa{}; \
sa.sa_handler = SIG_DFL; \ sa.sa_handler = SIG_DFL; \
sa.sa_flags = 0; \ sa.sa_flags = 0; \
if (sigemptyset(&sa.sa_mask) != 0 || \ if (sigemptyset(&sa.sa_mask) != 0 || \
@ -54,7 +54,7 @@ static void setSignalHandler(
int signal, int signal,
void (*handler)(int, siginfo_t*, void*), void (*handler)(int, siginfo_t*, void*),
struct sigaction* old_sa_ptr) { struct sigaction* old_sa_ptr) {
struct sigaction sa {}; struct sigaction sa{};
sa.sa_sigaction = handler; sa.sa_sigaction = handler;
sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP | SA_NODEFER; sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP | SA_NODEFER;
if (sigemptyset(&sa.sa_mask) != 0 || if (sigemptyset(&sa.sa_mask) != 0 ||
@ -92,7 +92,7 @@ static void handler_SIGTERM(int sig, siginfo_t* info, void* ctx) {
if (info->si_pid == getppid()) { if (info->si_pid == getppid()) {
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);
} }
struct sigaction sa {}; struct sigaction sa{};
sa.sa_handler = SIG_DFL; sa.sa_handler = SIG_DFL;
sa.sa_flags = 0; sa.sa_flags = 0;
if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, nullptr) != 0) { if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, nullptr) != 0) {

View File

@ -40,10 +40,9 @@ struct has_forward {
template <typename Head = void, typename... Tail> template <typename Head = void, typename... Tail>
constexpr bool check_not_lvalue_references() { constexpr bool check_not_lvalue_references() {
return ( return (!std::is_lvalue_reference_v<Head> ||
!std::is_lvalue_reference_v<Head> || std::is_const_v<std::remove_reference_t<Head>>) &&
std::is_const_v<std::remove_reference_t< check_not_lvalue_references<Tail...>();
Head>>)&&check_not_lvalue_references<Tail...>();
} }
template <> template <>

View File

@ -59,7 +59,7 @@ namespace {
// in data parallel, and should not be exposed as a user API. // in data parallel, and should not be exposed as a user API.
struct ReduceAdd : public autograd::Node { struct ReduceAdd : public autograd::Node {
explicit ReduceAdd(const at::Device& destination_device) explicit ReduceAdd(const at::Device& destination_device)
: destination_device_(destination_device){}; : destination_device_(destination_device) {};
~ReduceAdd() override = default; ~ReduceAdd() override = default;
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)

View File

@ -71,7 +71,7 @@ template <
typename T, typename T,
typename ValueT, typename ValueT,
typename ParamsT, typename ParamsT,
ValueT ParamsT::*ptr, ValueT ParamsT::* ptr,
typename ConvertArgT, typename ConvertArgT,
PyObject* (*Convert)(ConvertArgT)> PyObject* (*Convert)(ConvertArgT)>
static PyObject* getTupleAttr(PyObject* obj, void* _unused) { static PyObject* getTupleAttr(PyObject* obj, void* _unused) {
@ -93,7 +93,7 @@ template <
typename T, typename T,
typename ValueT, typename ValueT,
typename ParamsT, typename ParamsT,
ValueT ParamsT::*ptr, ValueT ParamsT::* ptr,
typename ConvertArgT, typename ConvertArgT,
PyObject* (*Convert)(ConvertArgT)> PyObject* (*Convert)(ConvertArgT)>
static PyObject* getValueAttr(PyObject* obj, void* _unused) { static PyObject* getValueAttr(PyObject* obj, void* _unused) {

View File

@ -41,10 +41,10 @@ struct TORCH_API JitDecompInterface {
TORCH_API void setJitDecompImpl(JitDecompInterface* impl); TORCH_API void setJitDecompImpl(JitDecompInterface* impl);
TORCH_API JitDecompInterface* getJitDecompImpl(); TORCH_API JitDecompInterface* getJitDecompImpl();
struct TORCH_API JitDecompRegisterer { struct TORCH_API JitDecompRegisterer{explicit JitDecompRegisterer(
explicit JitDecompRegisterer(JitDecompInterface* impl) { JitDecompInterface * impl){setJitDecompImpl(impl);
setJitDecompImpl(impl); } // namespace torch::autograd::impl
} }
}; ;
} // namespace torch::autograd::impl } // namespace torch::autograd::impl

View File

@ -1625,7 +1625,7 @@ using setter = int (*)(PyObject*, PyObject*, void*);
namespace { namespace {
template <PyObject* THPFunction::*ptr> template <PyObject* THPFunction::* ptr>
PyObject* getObject(PyObject* obj, void* _unused) { PyObject* getObject(PyObject* obj, void* _unused) {
auto self = (THPFunction*)obj; auto self = (THPFunction*)obj;
PyObject* value = self->*ptr; PyObject* value = self->*ptr;
@ -1636,7 +1636,7 @@ PyObject* getObject(PyObject* obj, void* _unused) {
return value; return value;
} }
template <PyObject* THPFunction::*ptr> template <PyObject* THPFunction::* ptr>
int setObject(PyObject* obj, PyObject* value, void* _unused) { int setObject(PyObject* obj, PyObject* value, void* _unused) {
auto self = (THPFunction*)obj; auto self = (THPFunction*)obj;
if (value == Py_None) { if (value == Py_None) {
@ -1648,13 +1648,13 @@ int setObject(PyObject* obj, PyObject* value, void* _unused) {
return 0; return 0;
} }
template <typename M, M THPFunction::*ptr, PyObject* (*Convert)(long)> template <typename M, M THPFunction::* ptr, PyObject* (*Convert)(long)>
PyObject* getMember(PyObject* obj, void* _unused) { PyObject* getMember(PyObject* obj, void* _unused) {
auto self = (THPFunction*)obj; auto self = (THPFunction*)obj;
return Convert(self->*ptr); return Convert(self->*ptr);
} }
template <typename M, M autograd::Node::*ptr, PyObject* (*Convert)(long)> template <typename M, M autograd::Node::* ptr, PyObject* (*Convert)(long)>
PyObject* getImplMember(PyObject* obj, void* _unused) { PyObject* getImplMember(PyObject* obj, void* _unused) {
auto self = (THPFunction*)obj; auto self = (THPFunction*)obj;
return Convert(self->cdata.*ptr); return Convert(self->cdata.*ptr);

View File

@ -37,7 +37,7 @@ class CUDASymmetricMemory : public SymmetricMemory {
int rank, int rank,
int world_size); int world_size);
~CUDASymmetricMemory() override{}; ~CUDASymmetricMemory() override {};
std::vector<void*> get_buffer_ptrs() override; std::vector<void*> get_buffer_ptrs() override;
std::vector<void*> get_signal_pad_ptrs() override; std::vector<void*> get_signal_pad_ptrs() override;

View File

@ -85,14 +85,16 @@ void IpcChannel::send_fd(int dst_pid, int fd) {
memset(cbuf, 0, sizeof(cbuf)); memset(cbuf, 0, sizeof(cbuf));
// Create message header // Create message header
struct msghdr msg { struct msghdr msg{
// destination socket address and size of it // destination socket address and size of it
// message content in msg_iov and number of such structs (1 in our case) // message content in msg_iov and number of such structs (1 in our case)
// auxiliary data with the value of fd and size of it // auxiliary data with the value of fd and size of it
.msg_name = (void*)&addr, .msg_namelen = sizeof(struct sockaddr_un), .msg_name = (void*)&addr,
.msg_iov = &io, .msg_iovlen = 1, .msg_control = cbuf, .msg_namelen = sizeof(struct sockaddr_un),
.msg_controllen = sizeof(cbuf) .msg_iov = &io,
}; .msg_iovlen = 1,
.msg_control = cbuf,
.msg_controllen = sizeof(cbuf)};
// This points to the first control message header // This points to the first control message header
// With SCM_RIGHTS we let the kernel know that we are passing file // With SCM_RIGHTS we let the kernel know that we are passing file

View File

@ -136,25 +136,26 @@ IMPL_REDUCE(CPU)
IMPL_REDUCE(CUDA) IMPL_REDUCE(CUDA)
IMPL_REDUCE(PrivateUse1) IMPL_REDUCE(PrivateUse1)
#define IMPL_BROADCAST(DEV) \ #define IMPL_BROADCAST(DEV) \
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \ std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
broadcast_##DEV( \ broadcast_##DEV( \
at::TensorList tensors, \ at::TensorList tensors, \
const c10::intrusive_ptr<ProcessGroup>& process_group, \ const c10::intrusive_ptr<ProcessGroup>& process_group, \
int64_t root_rank, \ int64_t root_rank, \
int64_t root_tensor, \ int64_t root_tensor, \
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto tensor_vec = tensors.vec(); \ auto tensor_vec = tensors.vec(); \
auto work = process_group->getBackend(c10::DeviceType::DEV) -> broadcast( \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
tensor_vec, \ ->broadcast( \
BroadcastOptions{ \ tensor_vec, \
root_rank, \ BroadcastOptions{ \
root_tensor, \ root_rank, \
std::chrono::milliseconds(timeout), \ root_tensor, \
asyncOp}); \ std::chrono::milliseconds(timeout), \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \ asyncOp}); \
std::move(tensor_vec), work); \ return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
std::move(tensor_vec), work); \
} }
IMPL_BROADCAST(CPU) IMPL_BROADCAST(CPU)
@ -164,22 +165,25 @@ IMPL_BROADCAST(PrivateUse1)
// Return input tensors as output tensors to make inplace allreduce look like // Return input tensors as output tensors to make inplace allreduce look like
// a functional API, so that make_fx can correctly build the dependencies in // a functional API, so that make_fx can correctly build the dependencies in
// the graph later. // the graph later.
#define IMPL_ALLREDUCE(DEV) \ #define IMPL_ALLREDUCE(DEV) \
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \ std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
allreduce_##DEV( \ allreduce_##DEV( \
at::TensorList tensors, \ at::TensorList tensors, \
const c10::intrusive_ptr<ProcessGroup>& process_group, \ const c10::intrusive_ptr<ProcessGroup>& process_group, \
const c10::intrusive_ptr<ReduceOp>& reduce_op, \ const c10::intrusive_ptr<ReduceOp>& reduce_op, \
const std::optional<at::Tensor>& sparse_indices, \ const std::optional<at::Tensor>& sparse_indices, \
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto tensor_vec = tensors.vec(); \ auto tensor_vec = tensors.vec(); \
auto work = process_group->getBackend(c10::DeviceType::DEV) -> allreduce( \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
tensor_vec, \ ->allreduce( \
AllreduceOptions{ \ tensor_vec, \
*reduce_op.get(), std::chrono::milliseconds(timeout), asyncOp}); \ AllreduceOptions{ \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \ *reduce_op.get(), \
std::move(tensor_vec), work); \ std::chrono::milliseconds(timeout), \
asyncOp}); \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
std::move(tensor_vec), work); \
} }
IMPL_ALLREDUCE(CPU) IMPL_ALLREDUCE(CPU)
@ -217,10 +221,13 @@ IMPL_ALLREDUCE_COALESCED(PrivateUse1)
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto input_tensors_vec = input_tensors.vec(); \ auto input_tensors_vec = input_tensors.vec(); \
auto work = process_group->getBackend(c10::DeviceType::DEV) -> allgather( \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
const_cast<std::vector<std::vector<at::Tensor>>&>(output_tensors), \ ->allgather( \
input_tensors_vec, \ const_cast<std::vector<std::vector<at::Tensor>>&>( \
AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp}); \ output_tensors), \
input_tensors_vec, \
AllgatherOptions{ \
std::chrono::milliseconds(timeout), asyncOp}); \
return std:: \ return std:: \
tuple<std::vector<std::vector<at::Tensor>>, c10::intrusive_ptr<Work>>( \ tuple<std::vector<std::vector<at::Tensor>>, c10::intrusive_ptr<Work>>( \
output_tensors, work); \ output_tensors, work); \
@ -231,20 +238,21 @@ IMPL_ALLGATHER(CPU)
IMPL_ALLGATHER(CUDA) IMPL_ALLGATHER(CUDA)
IMPL_ALLGATHER(PrivateUse1) IMPL_ALLGATHER(PrivateUse1)
#define IMPL__ALLGATHER_BASE(DEV) \ #define IMPL__ALLGATHER_BASE(DEV) \
std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV( \ std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV( \
at::Tensor& output_tensor, \ at::Tensor& output_tensor, \
at::Tensor& input_tensor, \ at::Tensor& input_tensor, \
const c10::intrusive_ptr<ProcessGroup>& process_group, \ const c10::intrusive_ptr<ProcessGroup>& process_group, \
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto work = \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
process_group->getBackend(c10::DeviceType::DEV) -> _allgather_base( \ ->_allgather_base( \
output_tensor, \ output_tensor, \
input_tensor, \ input_tensor, \
AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp}); \ AllgatherOptions{ \
return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>( \ std::chrono::milliseconds(timeout), asyncOp}); \
output_tensor, work); \ return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>( \
output_tensor, work); \
} }
IMPL__ALLGATHER_BASE(CPU) IMPL__ALLGATHER_BASE(CPU)
@ -289,26 +297,27 @@ IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CPU)
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CUDA) IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CUDA)
IMPL_ALLGATHER_INTO_TENSOR_COALESCED(PrivateUse1) IMPL_ALLGATHER_INTO_TENSOR_COALESCED(PrivateUse1)
#define IMPL_REDUCE_SCATTER(DEV) \ #define IMPL_REDUCE_SCATTER(DEV) \
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \ std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
reduce_scatter_##DEV( \ reduce_scatter_##DEV( \
const at::TensorList& output_tensors, \ const at::TensorList& output_tensors, \
const std::vector<std::vector<at::Tensor>>& input_tensors, \ const std::vector<std::vector<at::Tensor>>& input_tensors, \
const c10::intrusive_ptr<ProcessGroup>& process_group, \ const c10::intrusive_ptr<ProcessGroup>& process_group, \
const c10::intrusive_ptr<ReduceOp>& reduce_op, \ const c10::intrusive_ptr<ReduceOp>& reduce_op, \
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto output_tensors_vec = output_tensors.vec(); \ auto output_tensors_vec = output_tensors.vec(); \
auto work = \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
process_group->getBackend(c10::DeviceType::DEV) -> reduce_scatter( \ ->reduce_scatter( \
output_tensors_vec, \ output_tensors_vec, \
const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors), \ const_cast<std::vector<std::vector<at::Tensor>>&>( \
ReduceScatterOptions{ \ input_tensors), \
*reduce_op.get(), \ ReduceScatterOptions{ \
std::chrono::milliseconds(timeout), \ *reduce_op.get(), \
asyncOp}); \ std::chrono::milliseconds(timeout), \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \ asyncOp}); \
output_tensors_vec, work); \ return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
output_tensors_vec, work); \
} }
IMPL_REDUCE_SCATTER(CPU) IMPL_REDUCE_SCATTER(CPU)
@ -324,7 +333,7 @@ IMPL_REDUCE_SCATTER(PrivateUse1)
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto work = process_group->getBackend(c10::DeviceType::DEV) \ auto work = process_group->getBackend(c10::DeviceType::DEV) \
-> _reduce_scatter_base( \ ->_reduce_scatter_base( \
output_tensor, \ output_tensor, \
input_tensor, \ input_tensor, \
ReduceScatterOptions{ \ ReduceScatterOptions{ \
@ -393,11 +402,14 @@ IMPL_GATHER(PrivateUse1)
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto output_tensors_vec = output_tensors.vec(); \ auto output_tensors_vec = output_tensors.vec(); \
auto work = process_group->getBackend(c10::DeviceType::DEV) -> scatter( \ auto work = \
output_tensors_vec, \ process_group->getBackend(c10::DeviceType::DEV) \
const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors), \ ->scatter( \
ScatterOptions{ \ output_tensors_vec, \
root_rank, std::chrono::milliseconds(timeout), asyncOp}); \ const_cast<std::vector<std::vector<at::Tensor>>&>( \
input_tensors), \
ScatterOptions{ \
root_rank, std::chrono::milliseconds(timeout), asyncOp}); \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \ return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
std::move(output_tensors_vec), work); \ std::move(output_tensors_vec), work); \
} }
@ -406,22 +418,24 @@ IMPL_SCATTER(CPU)
IMPL_SCATTER(CUDA) IMPL_SCATTER(CUDA)
IMPL_SCATTER(PrivateUse1) IMPL_SCATTER(PrivateUse1)
#define IMPL_ALLTOALL(DEV) \ #define IMPL_ALLTOALL(DEV) \
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \ std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> \
alltoall_##DEV( \ alltoall_##DEV( \
const at::TensorList& output_tensors, \ const at::TensorList& output_tensors, \
const at::TensorList& input_tensors, \ const at::TensorList& input_tensors, \
const c10::intrusive_ptr<ProcessGroup>& process_group, \ const c10::intrusive_ptr<ProcessGroup>& process_group, \
bool asyncOp, \ bool asyncOp, \
int64_t timeout) { \ int64_t timeout) { \
auto output_tensors_vec = output_tensors.vec(); \ auto output_tensors_vec = output_tensors.vec(); \
auto input_tensors_vec = input_tensors.vec(); \ auto input_tensors_vec = input_tensors.vec(); \
auto work = process_group->getBackend(c10::DeviceType::DEV) -> alltoall( \ auto work = \
output_tensors_vec, \ process_group->getBackend(c10::DeviceType::DEV) \
input_tensors_vec, \ ->alltoall( \
AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp}); \ output_tensors_vec, \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \ input_tensors_vec, \
std::move(output_tensors_vec), work); \ AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp}); \
return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
std::move(output_tensors_vec), work); \
} }
IMPL_ALLTOALL(CPU) IMPL_ALLTOALL(CPU)

View File

@ -435,7 +435,7 @@ void socketInitialize() {
// gracefully fall back to an alternative if it doesn't. // gracefully fall back to an alternative if it doesn't.
bool doesHostnameResolveToUsableAddress(const std::string& hostname) { bool doesHostnameResolveToUsableAddress(const std::string& hostname) {
socketInitialize(); socketInitialize();
struct addrinfo hints {}; struct addrinfo hints{};
hints.ai_family = AF_UNSPEC; hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM; hints.ai_socktype = SOCK_STREAM;
struct addrinfo* result = nullptr; struct addrinfo* result = nullptr;

View File

@ -155,7 +155,7 @@ class UvTcpSocket : public UvHandle {
} }
void startRead() { void startRead() {
struct ::sockaddr_storage addr {}; struct ::sockaddr_storage addr{};
int addrLen{sizeof(struct ::sockaddr_storage)}; int addrLen{sizeof(struct ::sockaddr_storage)};
if (int err = uv_tcp_getpeername( if (int err = uv_tcp_getpeername(
@ -263,7 +263,7 @@ class UvTcpServer : public UvTcpSocket {
auto res = c10::make_intrusive<UvTcpServer>(loop); auto res = c10::make_intrusive<UvTcpServer>(loop);
res->handleReady(); res->handleReady();
try { try {
struct sockaddr_storage addr {}; struct sockaddr_storage addr{};
int uv_res = 0; int uv_res = 0;
if (useIpv6) { if (useIpv6) {
uv_res = uv_ip6_addr("::", port, (struct sockaddr_in6*)&addr); uv_res = uv_ip6_addr("::", port, (struct sockaddr_in6*)&addr);

View File

@ -40,5 +40,5 @@ RegisterHandler tracebackHandler{
res.setContent(std::move(file_contents), "text/plain"); res.setContent(std::move(file_contents), "text/plain");
}}; }};
} } // namespace
} // namespace c10d::control_plane } // namespace c10d::control_plane

View File

@ -631,8 +631,8 @@ PyObject* rpc_init(PyObject* _unused, PyObject* noargs) {
py::call_guard<py::gil_scoped_release>()) py::call_guard<py::gil_scoped_release>())
.def( .def(
"_get_device_map", "_get_device_map",
(DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst) const) & (DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst)
TensorPipeAgent::getDeviceMap, const)&TensorPipeAgent::getDeviceMap,
py::call_guard<py::gil_scoped_release>()) py::call_guard<py::gil_scoped_release>())
.def( .def(
"_get_backend_options", "_get_backend_options",

View File

@ -14,7 +14,7 @@ constexpr auto kInternalModule = "torch.distributed.rpc.internal";
#define PROFILE_GIL_SCOPED_ACQUIRE \ #define PROFILE_GIL_SCOPED_ACQUIRE \
std::chrono::time_point<std::chrono::high_resolution_clock> startTime; \ std::chrono::time_point<std::chrono::high_resolution_clock> startTime; \
auto shouldProfileGIL = \ auto shouldProfileGIL = \
RpcAgent::getCurrentRpcAgent() -> isGILProfilingEnabled(); \ RpcAgent::getCurrentRpcAgent()->isGILProfilingEnabled(); \
if (shouldProfileGIL) { \ if (shouldProfileGIL) { \
startTime = std::chrono::high_resolution_clock::now(); \ startTime = std::chrono::high_resolution_clock::now(); \
} \ } \

View File

@ -39,7 +39,7 @@ bool file_exists(const std::string& path) {
#ifdef _WIN32 #ifdef _WIN32
return fs::exists(path); return fs::exists(path);
#else #else
struct stat rc {}; struct stat rc{};
return lstat(path.c_str(), &rc) == 0; return lstat(path.c_str(), &rc) == 0;
#endif #endif
} }
@ -217,7 +217,7 @@ bool recursive_rmdir(const std::string& path) {
} }
struct dirent* entry = nullptr; struct dirent* entry = nullptr;
struct stat statbuf {}; struct stat statbuf{};
bool success = true; bool success = true;
// Iterate through directory entries // Iterate through directory entries

View File

@ -17,7 +17,7 @@ bool file_exists(std::string& path) {
#ifdef _WIN32 #ifdef _WIN32
return fs::exists(path); return fs::exists(path);
#else #else
struct stat rc {}; struct stat rc{};
return lstat(path.c_str(), &rc) == 0; return lstat(path.c_str(), &rc) == 0;
#endif #endif
} }

View File

@ -123,13 +123,13 @@ getAOTIModelRunnerRegistry();
// To register a new external backend in AOTI one needs to create an instance of // To register a new external backend in AOTI one needs to create an instance of
// this struct. It is not thread-safe. Becase it is expected to be called during // this struct. It is not thread-safe. Becase it is expected to be called during
// the initialization of the program. // the initialization of the program.
struct TORCH_API RegisterAOTIModelRunner { struct TORCH_API RegisterAOTIModelRunner{RegisterAOTIModelRunner(
RegisterAOTIModelRunner( const std::string& name,
const std::string& name, CreateAOTIModelRunnerFunc create_aoti_model_runner_fn){
CreateAOTIModelRunnerFunc create_aoti_model_runner_fn) {
getAOTIModelRunnerRegistry()[name] = create_aoti_model_runner_fn; getAOTIModelRunnerRegistry()[name] = create_aoti_model_runner_fn;
} } // namespace torch::inductor
}; }
;
} // namespace torch::inductor } // namespace torch::inductor
#endif #endif

View File

@ -87,7 +87,7 @@ bool file_exists(std::string& path) {
#ifdef _WIN32 #ifdef _WIN32
return fs::exists(path); return fs::exists(path);
#else #else
struct stat rc {}; struct stat rc{};
return lstat(path.c_str(), &rc) == 0; return lstat(path.c_str(), &rc) == 0;
#endif #endif
} }

View File

@ -358,7 +358,7 @@ struct IndexValueVec {
index = at::vec::VectorizedN<int64_t, NI>(0); index = at::vec::VectorizedN<int64_t, NI>(0);
}; };
IndexValueVec(){}; IndexValueVec() {};
}; };
template < template <

View File

@ -451,7 +451,7 @@ std::array<PyMethodDef, 2> StaticCudaLauncherMethods = {
// We don't implement __new__ or __init__ because we're using it only as a // We don't implement __new__ or __init__ because we're using it only as a
// container for static methods. // container for static methods.
PyTypeObject StaticCudaLauncherType = { PyTypeObject StaticCudaLauncherType = {
PyVarObject_HEAD_INIT(nullptr, 0) PyVarObject_HEAD_INIT(nullptr, 0)
"torch._C._StaticCudaLauncher", // tp_name "torch._C._StaticCudaLauncher", // tp_name
sizeof(PyObject), // tp_basicsize sizeof(PyObject), // tp_basicsize
0, // tp_itemsize 0, // tp_itemsize

View File

@ -45,12 +45,12 @@ TORCH_API void registerFusionBackend(
at::Device::Type backend_type, at::Device::Type backend_type,
FusedKernelConstructor ctor); FusedKernelConstructor ctor);
TORCH_API bool hasFusionBackend(at::Device::Type backend_type); TORCH_API bool hasFusionBackend(at::Device::Type backend_type);
struct TORCH_API RegisterFusionBackend { struct TORCH_API RegisterFusionBackend{RegisterFusionBackend(
RegisterFusionBackend( at::Device::Type backend_type,
at::Device::Type backend_type, FusedKernelConstructor ctor){
FusedKernelConstructor ctor) {
registerFusionBackend(backend_type, std::move(ctor)); registerFusionBackend(backend_type, std::move(ctor));
} } // namespace torch::jit::fuser
}; }
;
} // namespace torch::jit::fuser } // namespace torch::jit::fuser

View File

@ -134,7 +134,7 @@ static inline std::tuple<std::shared_ptr<char>, size_t> get_file_content(
// failed to open file, chances are it's no such file or directory. // failed to open file, chances are it's no such file or directory.
file_not_found_error(); file_not_found_error();
} }
struct stat statbuf {}; struct stat statbuf{};
fstat(fd, &statbuf); fstat(fd, &statbuf);
size_t size = statbuf.st_size; size_t size = statbuf.st_size;
void* ptr = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); void* ptr = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);

View File

@ -131,153 +131,153 @@ namespace torch::jit::tensorexpr {
c10::ArrayRef<SymbolAddress> getIntrinsicSymbols() { c10::ArrayRef<SymbolAddress> getIntrinsicSymbols() {
static SymbolAddress symbolAddresses[] = { static SymbolAddress symbolAddresses[] = {
{"log10f", reinterpret_cast<void*>(&log10f)}, {"log10f", reinterpret_cast<void*>(&log10f)},
{"log1pf", reinterpret_cast<void*>(&log1pf)}, {"log1pf", reinterpret_cast<void*>(&log1pf)},
{"logf", reinterpret_cast<void*>(&logf)}, {"logf", reinterpret_cast<void*>(&logf)},
{"log2f", reinterpret_cast<void*>(&log2f)}, {"log2f", reinterpret_cast<void*>(&log2f)},
{"expf", reinterpret_cast<void*>(&expf)}, {"expf", reinterpret_cast<void*>(&expf)},
{"erff", reinterpret_cast<void*>(&erff)}, {"erff", reinterpret_cast<void*>(&erff)},
{"cosf", reinterpret_cast<void*>(&cosf)}, {"cosf", reinterpret_cast<void*>(&cosf)},
{"sinf", reinterpret_cast<void*>(&sinf)}, {"sinf", reinterpret_cast<void*>(&sinf)},
{"tanf", reinterpret_cast<void*>(&tanf)}, {"tanf", reinterpret_cast<void*>(&tanf)},
{"acosf", reinterpret_cast<void*>(&acosf)}, {"acosf", reinterpret_cast<void*>(&acosf)},
{"asinf", reinterpret_cast<void*>(&asinf)}, {"asinf", reinterpret_cast<void*>(&asinf)},
{"atanf", reinterpret_cast<void*>(&atanf)}, {"atanf", reinterpret_cast<void*>(&atanf)},
{"coshf", reinterpret_cast<void*>(&coshf)}, {"coshf", reinterpret_cast<void*>(&coshf)},
{"sinhf", reinterpret_cast<void*>(&sinhf)}, {"sinhf", reinterpret_cast<void*>(&sinhf)},
{"tanhf", reinterpret_cast<void*>(&tanhf)}, {"tanhf", reinterpret_cast<void*>(&tanhf)},
{"sqrtf", reinterpret_cast<void*>(&sqrtf)}, {"sqrtf", reinterpret_cast<void*>(&sqrtf)},
{"fabsf", reinterpret_cast<void*>(&fabsf)}, {"fabsf", reinterpret_cast<void*>(&fabsf)},
{"floorf", reinterpret_cast<void*>(&floorf)}, {"floorf", reinterpret_cast<void*>(&floorf)},
{"ceilf", reinterpret_cast<void*>(&ceilf)}, {"ceilf", reinterpret_cast<void*>(&ceilf)},
{"roundf", reinterpret_cast<void*>(&roundf)}, {"roundf", reinterpret_cast<void*>(&roundf)},
{"truncf", reinterpret_cast<void*>(&truncf)}, {"truncf", reinterpret_cast<void*>(&truncf)},
{"atan2f", reinterpret_cast<void*>(&atan2f)}, {"atan2f", reinterpret_cast<void*>(&atan2f)},
{"fmodf", reinterpret_cast<void*>(&fmodf)}, {"fmodf", reinterpret_cast<void*>(&fmodf)},
{"remainderf", reinterpret_cast<void*>(&remainderf)}, {"remainderf", reinterpret_cast<void*>(&remainderf)},
// float -> half & half -> float conversions // float -> half & half -> float conversions
{"__gnu_h2f_ieee", {"__gnu_h2f_ieee",
reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)}, reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)},
{"__gnu_f2h_ieee", {"__gnu_f2h_ieee",
reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)}, reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)},
#if !defined(_MSC_VER) && defined(__x86_64__) #if !defined(_MSC_VER) && defined(__x86_64__)
// FP32 Sleef functions -- SSE // FP32 Sleef functions -- SSE
{"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)}, {"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)},
{"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)}, {"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)},
{"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)}, {"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)},
{"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)}, {"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)},
{"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)}, {"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)},
{"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)}, {"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)},
{"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)}, {"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)},
{"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)}, {"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)},
{"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)}, {"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)},
{"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)}, {"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)},
{"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)}, {"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)},
{"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)}, {"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)},
{"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)}, {"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)},
{"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)}, {"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)},
{"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)}, {"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)},
{"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)}, {"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)},
{"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)}, {"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)},
{"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)}, {"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)},
{"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)}, {"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)},
{"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)}, {"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)},
{"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)}, {"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)},
{"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)}, {"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)},
{"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)}, {"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)},
{"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)}, {"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)},
{"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)}, {"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)},
{"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)}, {"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)},
{"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)}, {"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)},
// FP32 Sleef functions -- AVX2 // FP32 Sleef functions -- AVX2
{"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)}, {"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)},
{"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)}, {"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)},
{"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)}, {"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)},
{"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)}, {"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)},
{"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)}, {"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)},
{"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)}, {"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)},
{"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)}, {"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)},
{"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)}, {"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)},
{"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)}, {"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)},
{"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)}, {"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)},
{"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)}, {"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)},
{"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)}, {"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)},
{"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)}, {"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)},
{"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)}, {"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)},
{"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)}, {"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)},
{"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)}, {"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)},
{"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)}, {"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)},
{"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)}, {"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)},
{"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)}, {"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)},
{"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)}, {"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)},
{"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)}, {"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)},
{"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)}, {"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)},
{"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)}, {"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)},
{"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)}, {"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)},
{"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)}, {"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)},
{"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)}, {"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)},
{"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)}, {"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)},
// FP64 Sleef functions -- SSE // FP64 Sleef functions -- SSE
{"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)}, {"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)},
{"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)}, {"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)},
{"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)}, {"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)},
{"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)}, {"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)},
{"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)}, {"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)},
{"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)}, {"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)},
{"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)}, {"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)},
{"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)}, {"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)},
{"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)}, {"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)},
{"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)}, {"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)},
{"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)}, {"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)},
{"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)}, {"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)},
{"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)}, {"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)},
{"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)}, {"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)},
{"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)}, {"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)},
{"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)}, {"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)},
{"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)}, {"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)},
{"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)}, {"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)},
{"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)}, {"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)},
{"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)}, {"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)},
{"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)}, {"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)},
{"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)}, {"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)},
{"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)}, {"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)},
{"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)}, {"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)},
{"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)}, {"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)},
{"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)}, {"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)},
{"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)}, {"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)},
// FP64 Sleef functions -- AVX2 // FP64 Sleef functions -- AVX2
{"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)}, {"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)},
{"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)}, {"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)},
{"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)}, {"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)},
{"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)}, {"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)},
{"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)}, {"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)},
{"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)}, {"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)},
{"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)}, {"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)},
{"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)}, {"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)},
{"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)}, {"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)},
{"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)}, {"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)},
{"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)}, {"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)},
{"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)}, {"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)},
{"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)}, {"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)},
{"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)}, {"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)},
{"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)}, {"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)},
{"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)}, {"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)},
{"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)}, {"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)},
{"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)}, {"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)},
{"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)}, {"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)},
{"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)}, {"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)},
{"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)}, {"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)},
{"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)}, {"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)},
{"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)}, {"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)},
{"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)}, {"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)},
{"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)}, {"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)},
{"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)}, {"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)},
{"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)}, {"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)},
#endif #endif
}; };
return c10::ArrayRef<SymbolAddress>(symbolAddresses); return c10::ArrayRef<SymbolAddress>(symbolAddresses);

View File

@ -36,12 +36,11 @@ enum class C10_API_ENUM Aggregation {
MIN = 6, MIN = 6,
}; };
struct TORCH_API AggregationHash { struct TORCH_API AggregationHash{template <typename T> std::size_t operator()(
template <typename T> T t) const {return static_cast<std::size_t>(t);
std::size_t operator()(T t) const { } // namespace torch::monitor
return static_cast<std::size_t>(t); }
} ;
};
// aggregationName returns the human readable name corresponding to the // aggregationName returns the human readable name corresponding to the
// aggregation. // aggregation.

View File

@ -808,56 +808,58 @@ void generateForwardBackwardLink(
void generateForwardBackwardLinks( void generateForwardBackwardLinks(
std::unique_ptr<torch::profiler::impl::kineto::trace_t>& cpu_trace, std::unique_ptr<torch::profiler::impl::kineto::trace_t>& cpu_trace,
const std::vector<std::shared_ptr<Result>>& results){ const std::vector<std::shared_ptr<Result>>& results) {
#ifndef USE_KINETO #ifndef USE_KINETO
} }
#else // USE_KINETO #else // USE_KINETO
TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size()); TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size());
// startThreadId_seqNum to pointer of activity. // startThreadId_seqNum to pointer of activity.
// Low-16bits of startThreadId and low-48bits seqNum are concatenated into // Low-16bits of startThreadId and low-48bits seqNum are concatenated into
// one uint64_t variable as key. // one uint64_t variable as key.
std::unordered_map<uint64_t, libkineto::GenericTraceActivity*> tidSeq2activity; std::unordered_map<uint64_t, libkineto::GenericTraceActivity*>
uint64_t fwd_bwd_link_id = 1; tidSeq2activity;
uint64_t fwd_bwd_link_id = 1;
using result_activity_t = std::pair<Result*, libkineto::GenericTraceActivity*>; using result_activity_t =
std::vector<result_activity_t> torch_events; std::pair<Result*, libkineto::GenericTraceActivity*>;
std::vector<result_activity_t> torch_events;
for (const auto idx : c10::irange(cpu_trace->activities.size())) { for (const auto idx : c10::irange(cpu_trace->activities.size())) {
auto& profiler_result = results[idx]; auto& profiler_result = results[idx];
auto& activity = cpu_trace->activities[idx]; auto& activity = cpu_trace->activities[idx];
// add information about an associated forward op, if a sequence number // add information about an associated forward op, if a sequence number
// is available (e.g. during training) // is available (e.g. during training)
profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>( profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>(
[&](const auto& e) { [&](const auto& e) {
if (e.sequence_number_ >= 0) { if (e.sequence_number_ >= 0) {
torch_events.emplace_back(profiler_result.get(), activity.get()); torch_events.emplace_back(profiler_result.get(), activity.get());
} }
});
}
// We need to visit the events in chronological order.
// So we sort them by end_time_ns_ before processing.
std::sort(
torch_events.begin(),
torch_events.end(),
[](const result_activity_t& left, const result_activity_t& right) {
auto left_end_time =
std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
.end_time_ns_;
auto right_end_time = std::get<ExtraFields<EventType::TorchOp>>(
right.first->extra_fields_)
.end_time_ns_;
return left_end_time < right_end_time;
}); });
}
// We need to visit the events in chronological order. for (auto& [profiler_result, activity] : torch_events) {
// So we sort them by end_time_ns_ before processing. generateForwardBackwardLink(
std::sort( *profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
torch_events.begin(), }
torch_events.end(),
[](const result_activity_t& left, const result_activity_t& right) {
auto left_end_time =
std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
.end_time_ns_;
auto right_end_time =
std::get<ExtraFields<EventType::TorchOp>>(right.first->extra_fields_)
.end_time_ns_;
return left_end_time < right_end_time;
});
for (auto& [profiler_result, activity] : torch_events) {
generateForwardBackwardLink(
*profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
}
} }
#endif // USE_KINETO #endif // USE_KINETO

View File

@ -63,7 +63,7 @@ void PerfEvent::Init() {
TORCH_CHECK(false, "Unsupported profiler event name: ", name_); TORCH_CHECK(false, "Unsupported profiler event name: ", name_);
} }
struct perf_event_attr attr {}; struct perf_event_attr attr{};
attr.size = sizeof(perf_event_attr); attr.size = sizeof(perf_event_attr);
attr.type = it->second.first; attr.type = it->second.first;

View File

@ -673,8 +673,8 @@ void initPythonBindings(PyObject* module) {
{nullptr}, {nullptr},
}; };
static PyTypeObject RecordFunctionFast_Type = { PyVarObject_HEAD_INIT(nullptr, static PyTypeObject RecordFunctionFast_Type = {
0) PyVarObject_HEAD_INIT(nullptr, 0)
}; };
RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast", RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast",

View File

@ -46,7 +46,7 @@ struct MemFile {
"failed to open {}: {}", "failed to open {}: {}",
filename_, filename_,
c10::utils::str_error(errno)); c10::utils::str_error(errno));
struct stat s {}; struct stat s{};
if (-1 == fstat(fd_, &s)) { if (-1 == fstat(fd_, &s)) {
close(fd_); // destructors don't run during exceptions close(fd_); // destructors don't run during exceptions
UNWIND_CHECK( UNWIND_CHECK(

View File

@ -101,11 +101,10 @@
#define THPBoolUtils_newReal(value) THPUtils_newReal_BOOL(value) #define THPBoolUtils_newReal(value) THPUtils_newReal_BOOL(value)
#define THPBoolUtils_checkAccreal(object) THPUtils_checkReal_BOOL(object) #define THPBoolUtils_checkAccreal(object) THPUtils_checkReal_BOOL(object)
#define THPBoolUtils_unpackAccreal(object) \ #define THPBoolUtils_unpackAccreal(object) \
(int64_t) THPUtils_unpackReal_BOOL(object) (int64_t)THPUtils_unpackReal_BOOL(object)
#define THPBoolUtils_newAccreal(value) THPUtils_newReal_BOOL(value) #define THPBoolUtils_newAccreal(value) THPUtils_newReal_BOOL(value)
#define THPLongUtils_checkReal(object) THPUtils_checkReal_INT(object) #define THPLongUtils_checkReal(object) THPUtils_checkReal_INT(object)
#define THPLongUtils_unpackReal(object) \ #define THPLongUtils_unpackReal(object) (int64_t)THPUtils_unpackReal_INT(object)
(int64_t) THPUtils_unpackReal_INT(object)
#define THPLongUtils_newReal(value) THPUtils_newReal_INT(value) #define THPLongUtils_newReal(value) THPUtils_newReal_INT(value)
#define THPIntUtils_checkReal(object) THPUtils_checkReal_INT(object) #define THPIntUtils_checkReal(object) THPUtils_checkReal_INT(object)
#define THPIntUtils_unpackReal(object) (int)THPUtils_unpackReal_INT(object) #define THPIntUtils_unpackReal(object) (int)THPUtils_unpackReal_INT(object)

View File

@ -362,7 +362,7 @@ TORCH_API void THP_encodeBuffer<c10::complex<double>>(
#define DEFINE_ENCODE(TYPE) \ #define DEFINE_ENCODE(TYPE) \
template TORCH_API void THP_encodeBuffer<TYPE>( \ template TORCH_API void THP_encodeBuffer<TYPE>( \
uint8_t * dst, const TYPE* src, THPByteOrder order, size_t len); uint8_t* dst, const TYPE* src, THPByteOrder order, size_t len);
DEFINE_ENCODE(int16_t) DEFINE_ENCODE(int16_t)
DEFINE_ENCODE(int32_t) DEFINE_ENCODE(int32_t)