mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[2/N] Fix clang-tidy readability checks (#164652)
This PR applies clang-tidy readability checks to jit sources and all headers in the code base. `readability-redundant-inline-specifier` is suppressed because it incurs too many changes. `readability-redundant-inline-specifier` is used to detect redundant inline specifiers on function and variable declarations. There are many in-class method definitions that are marked inline. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164652 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
331191ce4b
commit
9fff8155c3
@ -66,6 +66,7 @@ readability-simplify-subscript-expr,
|
|||||||
readability-string-compare,
|
readability-string-compare,
|
||||||
-readability-redundant-access-specifiers,
|
-readability-redundant-access-specifiers,
|
||||||
-readability-redundant-control-flow,
|
-readability-redundant-control-flow,
|
||||||
|
-readability-redundant-inline-specifier,
|
||||||
'
|
'
|
||||||
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
||||||
WarningsAsErrors: '*'
|
WarningsAsErrors: '*'
|
||||||
|
@ -214,7 +214,7 @@ inline Tensor applySlice(
|
|||||||
"step must be greater than zero");
|
"step must be greater than zero");
|
||||||
|
|
||||||
// See NOTE [nested tensor size for indexing]
|
// See NOTE [nested tensor size for indexing]
|
||||||
if (self_sizes.has_value() && self_sizes.value().size() > 0) {
|
if (self_sizes.has_value() && !self_sizes.value().empty()) {
|
||||||
// Skip this optimization if we are tracing, as the trace may be polymorphic
|
// Skip this optimization if we are tracing, as the trace may be polymorphic
|
||||||
// over the shape of the `self` tensor, and we still want to record
|
// over the shape of the `self` tensor, and we still want to record
|
||||||
// the slice.
|
// the slice.
|
||||||
|
@ -117,7 +117,7 @@ C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) {
|
|||||||
template <>
|
template <>
|
||||||
C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) {
|
C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) {
|
||||||
// https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
|
// https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
|
||||||
return median + sigma * at::tan(c10::pi<double> * (val - static_cast<double>(0.5)));
|
return median + sigma * at::tan(c10::pi<double> * (val - 0.5));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
namespace c10 {
|
namespace c10 {
|
||||||
|
|
||||||
inline BoxedKernel::BoxedKernel() : functor_(), boxed_kernel_func_(nullptr) {}
|
inline BoxedKernel::BoxedKernel() : boxed_kernel_func_(nullptr) {}
|
||||||
|
|
||||||
inline BoxedKernel::BoxedKernel(
|
inline BoxedKernel::BoxedKernel(
|
||||||
std::unique_ptr<OperatorKernel> functor,
|
std::unique_ptr<OperatorKernel> functor,
|
||||||
|
@ -20,9 +20,7 @@ make_unique_base(Args&&... args) {
|
|||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
inline KernelFunction::KernelFunction()
|
inline KernelFunction::KernelFunction()
|
||||||
: boxed_kernel_func_(),
|
: unboxed_kernel_func_(nullptr), sym_unboxed_kernel_func_(nullptr) {}
|
||||||
unboxed_kernel_func_(nullptr),
|
|
||||||
sym_unboxed_kernel_func_(nullptr) {}
|
|
||||||
|
|
||||||
inline KernelFunction::~KernelFunction() {
|
inline KernelFunction::~KernelFunction() {
|
||||||
if (tokens_) {
|
if (tokens_) {
|
||||||
|
@ -96,7 +96,7 @@ class TORCH_API Dispatcher final {
|
|||||||
friend class TypedOperatorHandle;
|
friend class TypedOperatorHandle;
|
||||||
|
|
||||||
struct Guard final {
|
struct Guard final {
|
||||||
Guard() : alive(true), mutex() {}
|
Guard() : alive(true) {}
|
||||||
std::atomic<bool> alive;
|
std::atomic<bool> alive;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
};
|
};
|
||||||
|
@ -114,7 +114,7 @@ constexpr bool allowlist_contains(std::string_view allowlist, std::string_view i
|
|||||||
}
|
}
|
||||||
next++;
|
next++;
|
||||||
} else {
|
} else {
|
||||||
if (allowlist.substr(cur).compare(item) == 0) {
|
if (allowlist.substr(cur) == item) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -411,7 +411,6 @@ public:
|
|||||||
|
|
||||||
Options()
|
Options()
|
||||||
: schemaOrName_(std::nullopt)
|
: schemaOrName_(std::nullopt)
|
||||||
, kernels()
|
|
||||||
, aliasAnalysisKind_(std::nullopt)
|
, aliasAnalysisKind_(std::nullopt)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -420,7 +419,6 @@ public:
|
|||||||
struct KernelRegistrationConfig final {
|
struct KernelRegistrationConfig final {
|
||||||
KernelRegistrationConfig()
|
KernelRegistrationConfig()
|
||||||
: dispatch_key(std::nullopt)
|
: dispatch_key(std::nullopt)
|
||||||
, func()
|
|
||||||
, cpp_signature(std::nullopt)
|
, cpp_signature(std::nullopt)
|
||||||
, inferred_function_schema(nullptr)
|
, inferred_function_schema(nullptr)
|
||||||
{}
|
{}
|
||||||
|
@ -99,8 +99,8 @@ struct CUDAGeneratorState : public c10::intrusive_ptr_target {
|
|||||||
uint64_t offset_intragraph_;
|
uint64_t offset_intragraph_;
|
||||||
bool capturing_{};
|
bool capturing_{};
|
||||||
std::unordered_set<cuda::CUDAGraph*> registered_graphs_;
|
std::unordered_set<cuda::CUDAGraph*> registered_graphs_;
|
||||||
at::TensorBase seed_extragraph_{};
|
at::TensorBase seed_extragraph_;
|
||||||
at::TensorBase offset_extragraph_{};
|
at::TensorBase offset_extragraph_;
|
||||||
|
|
||||||
CUDAGeneratorState(
|
CUDAGeneratorState(
|
||||||
uint64_t seed = default_rng_seed_val,
|
uint64_t seed = default_rng_seed_val,
|
||||||
@ -167,7 +167,7 @@ struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl {
|
|||||||
CUDAGeneratorImpl* clone_impl() const override;
|
CUDAGeneratorImpl* clone_impl() const override;
|
||||||
|
|
||||||
c10::intrusive_ptr<CUDAGeneratorState> state_;
|
c10::intrusive_ptr<CUDAGeneratorState> state_;
|
||||||
std::atomic_flag no_reset_rnn_state_{};
|
std::atomic_flag no_reset_rnn_state_;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace cuda::detail {
|
namespace cuda::detail {
|
||||||
|
@ -122,7 +122,7 @@ struct DeviceThreadHandlePool : public std::enable_shared_from_this<DeviceThread
|
|||||||
|
|
||||||
// Called by the destructor. Releases this thread's handles back into the pool.
|
// Called by the destructor. Releases this thread's handles back into the pool.
|
||||||
void release() {
|
void release() {
|
||||||
if(my_handles.size() > 0) {
|
if(!my_handles.empty()) {
|
||||||
auto parent = weak_parent.lock();
|
auto parent = weak_parent.lock();
|
||||||
if (!parent) {
|
if (!parent) {
|
||||||
// If this thread exits after atexit handlers have completed, the
|
// If this thread exits after atexit handlers have completed, the
|
||||||
|
@ -47,7 +47,7 @@ int64_t compute_arange_size(const Scalar& start, const Scalar& end, const Scalar
|
|||||||
int64_t sgn = (xstep > 0) - (xstep < 0);
|
int64_t sgn = (xstep > 0) - (xstep < 0);
|
||||||
size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);
|
size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);
|
||||||
} else {
|
} else {
|
||||||
size_d = std::ceil(static_cast<double>(end.to<double>() - start.to<double>())
|
size_d = std::ceil((end.to<double>() - start.to<double>())
|
||||||
/ step.to<double>());
|
/ step.to<double>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
|
|
||||||
#include <ATen/OpMathType.h>
|
#include <ATen/OpMathType.h>
|
||||||
#include <ATen/TensorUtils.h>
|
#include <ATen/TensorUtils.h>
|
||||||
#include <ATen/OpMathType.h>
|
|
||||||
#include <ATen/core/Tensor.h>
|
#include <ATen/core/Tensor.h>
|
||||||
#include <ATen/cpu/vec/functional.h>
|
#include <ATen/cpu/vec/functional.h>
|
||||||
#include <ATen/cpu/vec/vec.h>
|
#include <ATen/cpu/vec/vec.h>
|
||||||
|
@ -22,7 +22,7 @@ static inline void cpu_atomic_add_float(float* dst, float fvalue)
|
|||||||
old_value.floatV = *dst;
|
old_value.floatV = *dst;
|
||||||
new_value.floatV = old_value.floatV + fvalue;
|
new_value.floatV = old_value.floatV + fvalue;
|
||||||
|
|
||||||
unsigned* old_intV = (unsigned*)(&old_value.intV);
|
unsigned* old_intV = &old_value.intV;
|
||||||
while (!std::atomic_compare_exchange_strong(dst_intV, old_intV, new_value.intV)) {
|
while (!std::atomic_compare_exchange_strong(dst_intV, old_intV, new_value.intV)) {
|
||||||
#ifdef __aarch64__
|
#ifdef __aarch64__
|
||||||
__asm__ __volatile__("yield;" : : : "memory");
|
__asm__ __volatile__("yield;" : : : "memory");
|
||||||
|
@ -118,7 +118,7 @@ gemm_notrans_(
|
|||||||
scale_(m, n, beta, c, ldc);
|
scale_(m, n, beta, c, ldc);
|
||||||
|
|
||||||
// c += alpha * (a @ b)
|
// c += alpha * (a @ b)
|
||||||
const uint64_t unsigned_m = static_cast<int64_t>(m);
|
const uint64_t unsigned_m = m;
|
||||||
const uint64_t i_m = unsigned_m / 4;
|
const uint64_t i_m = unsigned_m / 4;
|
||||||
for (const uint64_t l : c10::irange(k)) {
|
for (const uint64_t l : c10::irange(k)) {
|
||||||
for (const uint64_t j : c10::irange(n)) {
|
for (const uint64_t j : c10::irange(n)) {
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
#include <ATen/OpMathType.h>
|
#include <ATen/OpMathType.h>
|
||||||
#include <ATen/native/cpu/utils.h>
|
#include <ATen/native/cpu/utils.h>
|
||||||
#include <ATen/OpMathType.h>
|
|
||||||
|
|
||||||
namespace at::native {
|
namespace at::native {
|
||||||
inline namespace CPU_CAPABILITY {
|
inline namespace CPU_CAPABILITY {
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
#include <ATen/cpu/vec/functional.h>
|
#include <ATen/cpu/vec/functional.h>
|
||||||
#include <ATen/cpu/vec/vec.h>
|
#include <ATen/cpu/vec/vec.h>
|
||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
#include <ATen/OpMathType.h>
|
|
||||||
|
|
||||||
// [Note AVX-SSE transitions] In general we avoid calls into cmath for code
|
// [Note AVX-SSE transitions] In general we avoid calls into cmath for code
|
||||||
// compiled with AVX/AVX2 This is because of SSE-AVX transitions and a bug in
|
// compiled with AVX/AVX2 This is because of SSE-AVX transitions and a bug in
|
||||||
|
@ -240,7 +240,7 @@ static void unfolded2d_copy(
|
|||||||
int64_t output_height,
|
int64_t output_height,
|
||||||
int64_t output_width) {
|
int64_t output_width) {
|
||||||
at::parallel_for(
|
at::parallel_for(
|
||||||
0, (int64_t)n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) {
|
0, n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) {
|
||||||
for (const auto k : c10::irange(start, end)) {
|
for (const auto k : c10::irange(start, end)) {
|
||||||
int64_t nip = k / (kH * kW);
|
int64_t nip = k / (kH * kW);
|
||||||
int64_t rest = k % (kH * kW);
|
int64_t rest = k % (kH * kW);
|
||||||
@ -316,7 +316,7 @@ static void unfolded2d_copy(
|
|||||||
for (int64_t x = 0; x < output_width; x++)
|
for (int64_t x = 0; x < output_width; x++)
|
||||||
memcpy(
|
memcpy(
|
||||||
dst + (size_t)y * output_width + x,
|
dst + (size_t)y * output_width + x,
|
||||||
src + (size_t)iy * input_width + ix + (int64_t)x * dW,
|
src + (size_t)iy * input_width + ix + x * dW,
|
||||||
sizeof(scalar_t) * (1));
|
sizeof(scalar_t) * (1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -906,7 +906,7 @@ static void ref_dyn_quant_matmul_4bit_channelwise_kernel(
|
|||||||
// Round to nearest integer
|
// Round to nearest integer
|
||||||
const int32_t nudged_zero_point0 = lrintf(zero_point0);
|
const int32_t nudged_zero_point0 = lrintf(zero_point0);
|
||||||
|
|
||||||
int8_t* dst_ptr = (int8_t*)lhs_qa8dx + m_idx * dst_stride;
|
int8_t* dst_ptr = lhs_qa8dx + m_idx * dst_stride;
|
||||||
|
|
||||||
// LHS offset at the beginning of the row
|
// LHS offset at the beginning of the row
|
||||||
*((float*)(dst_ptr)) = recip_scale0;
|
*((float*)(dst_ptr)) = recip_scale0;
|
||||||
@ -1048,7 +1048,7 @@ static void ref_dyn_quant_matmul_4bit_groupwise_kernel(
|
|||||||
zero_point0 = (std::min)(zero_point0, qmax);
|
zero_point0 = (std::min)(zero_point0, qmax);
|
||||||
const int32_t nudged_zero_point0 = lrintf(zero_point0);
|
const int32_t nudged_zero_point0 = lrintf(zero_point0);
|
||||||
|
|
||||||
int8_t* dst_ptr = (int8_t*)lhs_qa8dx + row_idx * dst_stride;
|
int8_t* dst_ptr = lhs_qa8dx + row_idx * dst_stride;
|
||||||
|
|
||||||
*((float*)(dst_ptr)) = recip_scale0;
|
*((float*)(dst_ptr)) = recip_scale0;
|
||||||
dst_ptr += sizeof(float);
|
dst_ptr += sizeof(float);
|
||||||
|
@ -146,12 +146,12 @@ inline TensorQuantizationParams ChooseQuantizationParams(
|
|||||||
// The arithmetic error on the zero point computed from either pair
|
// The arithmetic error on the zero point computed from either pair
|
||||||
// will be roughly machine_epsilon * (sum of absolute values of terms)
|
// will be roughly machine_epsilon * (sum of absolute values of terms)
|
||||||
// so we want to use the variant that adds the smaller terms.
|
// so we want to use the variant that adds the smaller terms.
|
||||||
double zero_point_from_min = qmin - min / static_cast<double>(scale);
|
double zero_point_from_min = qmin - min / scale;
|
||||||
double zero_point_from_max = qmax - max / static_cast<double>(scale);
|
double zero_point_from_max = qmax - max / scale;
|
||||||
double zero_point_from_min_error =
|
double zero_point_from_min_error =
|
||||||
std::abs(qmin) - std::abs(min / static_cast<double>(scale));
|
std::abs(qmin) - std::abs(min / scale);
|
||||||
double zero_point_from_max_error =
|
double zero_point_from_max_error =
|
||||||
std::abs(qmax) - std::abs(max / static_cast<double>(scale));
|
std::abs(qmax) - std::abs(max / scale);
|
||||||
double initial_zero_point =
|
double initial_zero_point =
|
||||||
zero_point_from_min_error < zero_point_from_max_error
|
zero_point_from_min_error < zero_point_from_max_error
|
||||||
? zero_point_from_min
|
? zero_point_from_min
|
||||||
|
@ -560,7 +560,7 @@ float hsum_sq(const int32_t* A, int len) {
|
|||||||
alignas(64) float temp[8];
|
alignas(64) float temp[8];
|
||||||
_mm256_store_ps(temp, sum_ps);
|
_mm256_store_ps(temp, sum_ps);
|
||||||
for (const auto k : c10::irange(8)) {
|
for (const auto k : c10::irange(8)) {
|
||||||
row_sum += static_cast<float>(temp[k]);
|
row_sum += temp[k];
|
||||||
}
|
}
|
||||||
#elif defined(CPU_CAPABILITY_AVX512)
|
#elif defined(CPU_CAPABILITY_AVX512)
|
||||||
__m512 sum_ps = _mm512_setzero_ps();
|
__m512 sum_ps = _mm512_setzero_ps();
|
||||||
@ -574,7 +574,7 @@ float hsum_sq(const int32_t* A, int len) {
|
|||||||
alignas(64) float temp[16];
|
alignas(64) float temp[16];
|
||||||
_mm512_store_ps(temp, sum_ps);
|
_mm512_store_ps(temp, sum_ps);
|
||||||
for (const auto k : c10::irange(16)) {
|
for (const auto k : c10::irange(16)) {
|
||||||
row_sum += static_cast<float>(temp[k]);
|
row_sum += temp[k];
|
||||||
}
|
}
|
||||||
#endif // CPU_CAPABILITY_AVX2 or CPU_CAPABILITY_AVX512
|
#endif // CPU_CAPABILITY_AVX2 or CPU_CAPABILITY_AVX512
|
||||||
|
|
||||||
@ -1282,7 +1282,7 @@ template <bool ReLUFused = false>
|
|||||||
void qadd_scalar_kernel(Tensor& out, const Tensor& self, const Scalar& other) {
|
void qadd_scalar_kernel(Tensor& out, const Tensor& self, const Scalar& other) {
|
||||||
int64_t zero_point = out.q_zero_point();
|
int64_t zero_point = out.q_zero_point();
|
||||||
float scale = static_cast<float>(out.q_scale());
|
float scale = static_cast<float>(out.q_scale());
|
||||||
float inv_scale = static_cast<float>(1.0f / scale);
|
float inv_scale = 1.0f / scale;
|
||||||
int64_t self_zero_point = self.q_zero_point();
|
int64_t self_zero_point = self.q_zero_point();
|
||||||
float self_scale = static_cast<float>(self.q_scale());
|
float self_scale = static_cast<float>(self.q_scale());
|
||||||
|
|
||||||
@ -2915,7 +2915,7 @@ void fake_quantize_learnable_channel_grad_kernel_cpu(
|
|||||||
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
|
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
|
||||||
*dx_output = (*dy_input) * (xqi >= quant_min && xqi <= quant_max);
|
*dx_output = (*dy_input) * (xqi >= quant_min && xqi <= quant_max);
|
||||||
// Calculate gradients for scale and zero point.
|
// Calculate gradients for scale and zero point.
|
||||||
float xfqi = static_cast<float>((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input));
|
float xfqi = ((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input));
|
||||||
if (xqi < quant_min || xqi > quant_max) {
|
if (xqi < quant_min || xqi > quant_max) {
|
||||||
*dzero_point_output = (*dy_input) * (-1) * (*scale_input) * grad_factor;
|
*dzero_point_output = (*dy_input) * (-1) * (*scale_input) * grad_factor;
|
||||||
*dscale_output = ((xqi < quant_min) ? ((*dy_input) * dscale_small) : ((*dy_input) * dscale_big)) * grad_factor;
|
*dscale_output = ((xqi < quant_min) ? ((*dy_input) * dscale_small) : ((*dy_input) * dscale_big)) * grad_factor;
|
||||||
@ -4415,7 +4415,7 @@ void _qmul_tensor_cpu_impl(
|
|||||||
uint8_t y_data = *(y_ptr + idx);
|
uint8_t y_data = *(y_ptr + idx);
|
||||||
int32_t x_val = static_cast<int32_t>(x_data) - x_zero_point;
|
int32_t x_val = static_cast<int32_t>(x_data) - x_zero_point;
|
||||||
int32_t y_val = static_cast<int32_t>(y_data) - y_zero_point;
|
int32_t y_val = static_cast<int32_t>(y_data) - y_zero_point;
|
||||||
int32_t out_val = static_cast<int32_t>(x_val * y_val);
|
int32_t out_val = x_val * y_val;
|
||||||
float out_val_f = (float)out_val * multiplier;
|
float out_val_f = (float)out_val * multiplier;
|
||||||
if constexpr (std::is_same<T, float>::value) {
|
if constexpr (std::is_same<T, float>::value) {
|
||||||
*(out_ptr + idx) = out_val_f;
|
*(out_ptr + idx) = out_val_f;
|
||||||
|
@ -90,12 +90,12 @@ class TORCH_API TensorMaker {
|
|||||||
|
|
||||||
void* data_;
|
void* data_;
|
||||||
IntArrayRef sizes_;
|
IntArrayRef sizes_;
|
||||||
OptionalIntArrayRef strides_{};
|
OptionalIntArrayRef strides_;
|
||||||
std::optional<int64_t> storage_offset_{};
|
std::optional<int64_t> storage_offset_;
|
||||||
std::function<void(void*)> deleter_{};
|
std::function<void(void*)> deleter_;
|
||||||
std::unique_ptr<void, ContextDeleter> ctx_{nullptr, detail::noopDelete};
|
std::unique_ptr<void, ContextDeleter> ctx_{nullptr, detail::noopDelete};
|
||||||
std::optional<Device> device_{};
|
std::optional<Device> device_;
|
||||||
TensorOptions opts_{};
|
TensorOptions opts_;
|
||||||
bool resizeable_{};
|
bool resizeable_{};
|
||||||
c10::Allocator* allocator_{};
|
c10::Allocator* allocator_{};
|
||||||
};
|
};
|
||||||
|
@ -182,7 +182,7 @@ class OptionalDeviceGuard {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
impl::InlineOptionalDeviceGuard<impl::VirtualGuardImpl> guard_{};
|
impl::InlineOptionalDeviceGuard<impl::VirtualGuardImpl> guard_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Note [Whither the DeviceGuard boilerplate]
|
// Note [Whither the DeviceGuard boilerplate]
|
||||||
|
@ -143,7 +143,7 @@ struct OptionalStreamGuard {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
c10::impl::InlineOptionalStreamGuard<impl::VirtualGuardImpl> guard_{};
|
c10::impl::InlineOptionalStreamGuard<impl::VirtualGuardImpl> guard_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -34,6 +34,7 @@ C10_DECLARE_REGISTRY(
|
|||||||
// Get the global PyInterpreter hooks instance
|
// Get the global PyInterpreter hooks instance
|
||||||
C10_API const PyInterpreterHooksInterface& getPyInterpreterHooks();
|
C10_API const PyInterpreterHooksInterface& getPyInterpreterHooks();
|
||||||
|
|
||||||
|
// Helper function to get the global interpreter
|
||||||
C10_API PyInterpreter* getGlobalPyInterpreter();
|
C10_API PyInterpreter* getGlobalPyInterpreter();
|
||||||
|
|
||||||
} // namespace c10::impl
|
} // namespace c10::impl
|
||||||
|
@ -137,7 +137,7 @@ struct TraceEntry {
|
|||||||
size_t size_;
|
size_t size_;
|
||||||
MempoolId_t mempool_;
|
MempoolId_t mempool_;
|
||||||
trace_time_ time_{};
|
trace_time_ time_{};
|
||||||
std::string compile_context_{};
|
std::string compile_context_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Calls made by record_function will save annotations
|
// Calls made by record_function will save annotations
|
||||||
|
@ -39,7 +39,7 @@ struct AllocAligned {
|
|||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize);
|
p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize);
|
||||||
#else
|
#else
|
||||||
auto res = posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T));
|
auto res = posix_memalign(&p, kGEMMLOWPCacheLineSize, sizeof(T));
|
||||||
(void)res;
|
(void)res;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -686,23 +686,23 @@ struct TORCH_API MultiheadAttentionForwardFuncOptions {
|
|||||||
|
|
||||||
TORCH_ARG(bool, training) = true;
|
TORCH_ARG(bool, training) = true;
|
||||||
|
|
||||||
TORCH_ARG(Tensor, key_padding_mask) = {};
|
TORCH_ARG(Tensor, key_padding_mask);
|
||||||
|
|
||||||
TORCH_ARG(bool, need_weights) = true;
|
TORCH_ARG(bool, need_weights) = true;
|
||||||
|
|
||||||
TORCH_ARG(Tensor, attn_mask) = {};
|
TORCH_ARG(Tensor, attn_mask);
|
||||||
|
|
||||||
TORCH_ARG(bool, use_separate_proj_weight) = false;
|
TORCH_ARG(bool, use_separate_proj_weight) = false;
|
||||||
|
|
||||||
TORCH_ARG(Tensor, q_proj_weight) = {};
|
TORCH_ARG(Tensor, q_proj_weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, k_proj_weight) = {};
|
TORCH_ARG(Tensor, k_proj_weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, v_proj_weight) = {};
|
TORCH_ARG(Tensor, v_proj_weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, static_k) = {};
|
TORCH_ARG(Tensor, static_k);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, static_v) = {};
|
TORCH_ARG(Tensor, static_v);
|
||||||
|
|
||||||
TORCH_ARG(bool, average_attn_weights) = true;
|
TORCH_ARG(bool, average_attn_weights) = true;
|
||||||
};
|
};
|
||||||
|
@ -73,9 +73,9 @@ namespace functional {
|
|||||||
/// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false));
|
/// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false));
|
||||||
/// ```
|
/// ```
|
||||||
struct TORCH_API BatchNormFuncOptions {
|
struct TORCH_API BatchNormFuncOptions {
|
||||||
TORCH_ARG(Tensor, weight) = Tensor();
|
TORCH_ARG(Tensor, weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, bias) = Tensor();
|
TORCH_ARG(Tensor, bias);
|
||||||
|
|
||||||
TORCH_ARG(bool, training) = false;
|
TORCH_ARG(bool, training) = false;
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ struct ConvFuncOptions {
|
|||||||
using padding_t = torch::nn::detail::conv_padding_t<D>;
|
using padding_t = torch::nn::detail::conv_padding_t<D>;
|
||||||
|
|
||||||
/// optional bias of shape `(out_channels)`. Default: ``None``
|
/// optional bias of shape `(out_channels)`. Default: ``None``
|
||||||
TORCH_ARG(torch::Tensor, bias) = Tensor();
|
TORCH_ARG(torch::Tensor, bias);
|
||||||
|
|
||||||
/// The stride of the convolving kernel.
|
/// The stride of the convolving kernel.
|
||||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||||
@ -352,7 +352,7 @@ namespace functional {
|
|||||||
template <size_t D>
|
template <size_t D>
|
||||||
struct ConvTransposeFuncOptions {
|
struct ConvTransposeFuncOptions {
|
||||||
/// optional bias of shape `(out_channels)`. Default: ``None``
|
/// optional bias of shape `(out_channels)`. Default: ``None``
|
||||||
TORCH_ARG(torch::Tensor, bias) = Tensor();
|
TORCH_ARG(torch::Tensor, bias);
|
||||||
|
|
||||||
/// The stride of the convolving kernel.
|
/// The stride of the convolving kernel.
|
||||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||||
|
@ -40,7 +40,7 @@ struct TORCH_API EmbeddingOptions {
|
|||||||
TORCH_ARG(bool, sparse) = false;
|
TORCH_ARG(bool, sparse) = false;
|
||||||
/// The learnable weights of the module of shape (num_embeddings,
|
/// The learnable weights of the module of shape (num_embeddings,
|
||||||
/// embedding_dim)
|
/// embedding_dim)
|
||||||
TORCH_ARG(torch::Tensor, _weight) = Tensor();
|
TORCH_ARG(torch::Tensor, _weight);
|
||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@ -136,7 +136,7 @@ struct TORCH_API EmbeddingBagOptions {
|
|||||||
TORCH_ARG(bool, sparse) = false;
|
TORCH_ARG(bool, sparse) = false;
|
||||||
/// The learnable weights of the module of shape (num_embeddings,
|
/// The learnable weights of the module of shape (num_embeddings,
|
||||||
/// embedding_dim)
|
/// embedding_dim)
|
||||||
TORCH_ARG(torch::Tensor, _weight) = Tensor();
|
TORCH_ARG(torch::Tensor, _weight);
|
||||||
/// If ``true``, `offsets` has one additional element, where the last element
|
/// If ``true``, `offsets` has one additional element, where the last element
|
||||||
/// is equivalent to the size of `indices`. This matches the CSR format.
|
/// is equivalent to the size of `indices`. This matches the CSR format.
|
||||||
TORCH_ARG(bool, include_last_offset) = false;
|
TORCH_ARG(bool, include_last_offset) = false;
|
||||||
@ -201,7 +201,7 @@ namespace functional {
|
|||||||
struct TORCH_API EmbeddingBagFuncOptions {
|
struct TORCH_API EmbeddingBagFuncOptions {
|
||||||
/// Only used when `input` is 1D. `offsets` determines
|
/// Only used when `input` is 1D. `offsets` determines
|
||||||
/// the starting index position of each bag (sequence) in `input`.
|
/// the starting index position of each bag (sequence) in `input`.
|
||||||
TORCH_ARG(torch::Tensor, offsets) = Tensor();
|
TORCH_ARG(torch::Tensor, offsets);
|
||||||
/// If given, each embedding vector with norm larger than `max_norm` is
|
/// If given, each embedding vector with norm larger than `max_norm` is
|
||||||
/// renormalized to have norm `max_norm`.
|
/// renormalized to have norm `max_norm`.
|
||||||
TORCH_ARG(std::optional<double>, max_norm) = std::nullopt;
|
TORCH_ARG(std::optional<double>, max_norm) = std::nullopt;
|
||||||
@ -223,7 +223,7 @@ struct TORCH_API EmbeddingBagFuncOptions {
|
|||||||
/// be taken to be 1. If specified, `per_sample_weights` must have exactly the
|
/// be taken to be 1. If specified, `per_sample_weights` must have exactly the
|
||||||
/// same shape as input and is treated as having the same `offsets`, if those
|
/// same shape as input and is treated as having the same `offsets`, if those
|
||||||
/// are not None.
|
/// are not None.
|
||||||
TORCH_ARG(torch::Tensor, per_sample_weights) = Tensor();
|
TORCH_ARG(torch::Tensor, per_sample_weights);
|
||||||
/// If ``true``, `offsets` has one additional element, where the last element
|
/// If ``true``, `offsets` has one additional element, where the last element
|
||||||
/// is equivalent to the size of `indices`. This matches the CSR format. Note:
|
/// is equivalent to the size of `indices`. This matches the CSR format. Note:
|
||||||
/// this option is currently only supported when ``mode="sum"``.
|
/// this option is currently only supported when ``mode="sum"``.
|
||||||
|
@ -67,13 +67,13 @@ namespace functional {
|
|||||||
/// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5));
|
/// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5));
|
||||||
/// ```
|
/// ```
|
||||||
struct TORCH_API InstanceNormFuncOptions {
|
struct TORCH_API InstanceNormFuncOptions {
|
||||||
TORCH_ARG(Tensor, running_mean) = Tensor();
|
TORCH_ARG(Tensor, running_mean);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, running_var) = Tensor();
|
TORCH_ARG(Tensor, running_var);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, weight) = Tensor();
|
TORCH_ARG(Tensor, weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, bias) = Tensor();
|
TORCH_ARG(Tensor, bias);
|
||||||
|
|
||||||
TORCH_ARG(bool, use_input_stats) = true;
|
TORCH_ARG(bool, use_input_stats) = true;
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ struct TORCH_API BCELossOptions {
|
|||||||
reduction_t;
|
reduction_t;
|
||||||
|
|
||||||
/// A manual rescaling weight given to the loss of each batch element.
|
/// A manual rescaling weight given to the loss of each batch element.
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
/// Specifies the reduction to apply to the output.
|
/// Specifies the reduction to apply to the output.
|
||||||
/// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
|
/// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
|
||||||
TORCH_ARG(reduction_t, reduction) = torch::kMean;
|
TORCH_ARG(reduction_t, reduction) = torch::kMean;
|
||||||
@ -207,7 +207,7 @@ struct TORCH_API MultiMarginLossOptions {
|
|||||||
/// A manual rescaling weight given to each
|
/// A manual rescaling weight given to each
|
||||||
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
||||||
/// treated as if having all ones.
|
/// treated as if having all ones.
|
||||||
TORCH_ARG(Tensor, weight) = Tensor();
|
TORCH_ARG(Tensor, weight);
|
||||||
/// Specifies the reduction to apply to the output:
|
/// Specifies the reduction to apply to the output:
|
||||||
/// ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be
|
/// ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be
|
||||||
/// applied,
|
/// applied,
|
||||||
@ -365,7 +365,7 @@ struct TORCH_API MultiLabelSoftMarginLossOptions {
|
|||||||
/// A manual rescaling weight given to each
|
/// A manual rescaling weight given to each
|
||||||
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
||||||
/// treated as if having all ones.
|
/// treated as if having all ones.
|
||||||
TORCH_ARG(Tensor, weight) = Tensor();
|
TORCH_ARG(Tensor, weight);
|
||||||
|
|
||||||
/// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
|
/// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
|
||||||
/// 'none': no reduction will be applied, 'mean': the sum of the output will
|
/// 'none': no reduction will be applied, 'mean': the sum of the output will
|
||||||
@ -697,7 +697,7 @@ struct TORCH_API NLLLossOptions {
|
|||||||
/// A manual rescaling weight given to each
|
/// A manual rescaling weight given to each
|
||||||
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
/// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
|
||||||
/// treated as if having all ones.
|
/// treated as if having all ones.
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
/// Specifies a target value that is ignored
|
/// Specifies a target value that is ignored
|
||||||
/// and does not contribute to the input gradient.
|
/// and does not contribute to the input gradient.
|
||||||
TORCH_ARG(int64_t, ignore_index) = -100;
|
TORCH_ARG(int64_t, ignore_index) = -100;
|
||||||
@ -735,7 +735,7 @@ struct TORCH_API CrossEntropyLossOptions {
|
|||||||
|
|
||||||
/// A manual rescaling weight given to each class. If given, has to be a
|
/// A manual rescaling weight given to each class. If given, has to be a
|
||||||
/// Tensor of size C
|
/// Tensor of size C
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
/// Specifies a target value that is ignored
|
/// Specifies a target value that is ignored
|
||||||
/// and does not contribute to the input gradient.
|
/// and does not contribute to the input gradient.
|
||||||
TORCH_ARG(int64_t, ignore_index) = -100;
|
TORCH_ARG(int64_t, ignore_index) = -100;
|
||||||
@ -774,12 +774,12 @@ struct TORCH_API BCEWithLogitsLossOptions {
|
|||||||
reduction_t;
|
reduction_t;
|
||||||
/// A manual rescaling weight given to the loss of each batch element.
|
/// A manual rescaling weight given to the loss of each batch element.
|
||||||
/// If given, has to be a Tensor of size `nbatch`.
|
/// If given, has to be a Tensor of size `nbatch`.
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
/// Specifies the reduction to apply to the output. Default: Mean
|
/// Specifies the reduction to apply to the output. Default: Mean
|
||||||
TORCH_ARG(reduction_t, reduction) = torch::kMean;
|
TORCH_ARG(reduction_t, reduction) = torch::kMean;
|
||||||
/// A weight of positive examples.
|
/// A weight of positive examples.
|
||||||
/// Must be a vector with length equal to the number of classes.
|
/// Must be a vector with length equal to the number of classes.
|
||||||
TORCH_ARG(Tensor, pos_weight) = {};
|
TORCH_ARG(Tensor, pos_weight);
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace functional {
|
namespace functional {
|
||||||
|
@ -43,9 +43,9 @@ struct TORCH_API LayerNormFuncOptions {
|
|||||||
/// input shape from an expected input.
|
/// input shape from an expected input.
|
||||||
TORCH_ARG(std::vector<int64_t>, normalized_shape);
|
TORCH_ARG(std::vector<int64_t>, normalized_shape);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, bias) = {};
|
TORCH_ARG(Tensor, bias);
|
||||||
|
|
||||||
/// a value added to the denominator for numerical stability. ``Default:
|
/// a value added to the denominator for numerical stability. ``Default:
|
||||||
/// 1e-5``.
|
/// 1e-5``.
|
||||||
@ -177,9 +177,9 @@ struct TORCH_API GroupNormFuncOptions {
|
|||||||
/// number of groups to separate the channels into
|
/// number of groups to separate the channels into
|
||||||
TORCH_ARG(int64_t, num_groups);
|
TORCH_ARG(int64_t, num_groups);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, weight) = {};
|
TORCH_ARG(Tensor, weight);
|
||||||
|
|
||||||
TORCH_ARG(Tensor, bias) = {};
|
TORCH_ARG(Tensor, bias);
|
||||||
|
|
||||||
/// a value added to the denominator for numerical stability. Default: 1e-5
|
/// a value added to the denominator for numerical stability. Default: 1e-5
|
||||||
TORCH_ARG(double, eps) = 1e-5;
|
TORCH_ARG(double, eps) = 1e-5;
|
||||||
|
@ -456,7 +456,7 @@ struct FractionalMaxPoolOptions {
|
|||||||
using ExpandingArrayDouble = torch::ExpandingArray<D, double>;
|
using ExpandingArrayDouble = torch::ExpandingArray<D, double>;
|
||||||
TORCH_ARG(std::optional<ExpandingArrayDouble>, output_ratio) = std::nullopt;
|
TORCH_ARG(std::optional<ExpandingArrayDouble>, output_ratio) = std::nullopt;
|
||||||
|
|
||||||
TORCH_ARG(torch::Tensor, _random_samples) = Tensor();
|
TORCH_ARG(torch::Tensor, _random_samples);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool2d` module.
|
/// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool2d` module.
|
||||||
|
@ -38,7 +38,7 @@ struct TORCH_API AdamParamState
|
|||||||
TORCH_ARG(int64_t, step) = 0;
|
TORCH_ARG(int64_t, step) = 0;
|
||||||
TORCH_ARG(torch::Tensor, exp_avg);
|
TORCH_ARG(torch::Tensor, exp_avg);
|
||||||
TORCH_ARG(torch::Tensor, exp_avg_sq);
|
TORCH_ARG(torch::Tensor, exp_avg_sq);
|
||||||
TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {};
|
TORCH_ARG(torch::Tensor, max_exp_avg_sq);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void serialize(torch::serialize::InputArchive& archive) override;
|
void serialize(torch::serialize::InputArchive& archive) override;
|
||||||
|
@ -38,7 +38,7 @@ struct TORCH_API AdamWParamState
|
|||||||
TORCH_ARG(int64_t, step) = 0;
|
TORCH_ARG(int64_t, step) = 0;
|
||||||
TORCH_ARG(torch::Tensor, exp_avg);
|
TORCH_ARG(torch::Tensor, exp_avg);
|
||||||
TORCH_ARG(torch::Tensor, exp_avg_sq);
|
TORCH_ARG(torch::Tensor, exp_avg_sq);
|
||||||
TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {};
|
TORCH_ARG(torch::Tensor, max_exp_avg_sq);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void serialize(torch::serialize::InputArchive& archive) override;
|
void serialize(torch::serialize::InputArchive& archive) override;
|
||||||
|
@ -39,9 +39,9 @@ struct TORCH_API LBFGSParamState
|
|||||||
TORCH_ARG(int64_t, n_iter) = 0;
|
TORCH_ARG(int64_t, n_iter) = 0;
|
||||||
TORCH_ARG(double, t) = 0;
|
TORCH_ARG(double, t) = 0;
|
||||||
TORCH_ARG(double, prev_loss) = 0;
|
TORCH_ARG(double, prev_loss) = 0;
|
||||||
TORCH_ARG(Tensor, d) = {};
|
TORCH_ARG(Tensor, d);
|
||||||
TORCH_ARG(Tensor, H_diag) = {};
|
TORCH_ARG(Tensor, H_diag);
|
||||||
TORCH_ARG(Tensor, prev_flat_grad) = {};
|
TORCH_ARG(Tensor, prev_flat_grad);
|
||||||
TORCH_ARG(std::deque<Tensor>, old_dirs);
|
TORCH_ARG(std::deque<Tensor>, old_dirs);
|
||||||
TORCH_ARG(std::deque<Tensor>, old_stps);
|
TORCH_ARG(std::deque<Tensor>, old_stps);
|
||||||
TORCH_ARG(std::deque<Tensor>, ro);
|
TORCH_ARG(std::deque<Tensor>, ro);
|
||||||
|
@ -43,8 +43,8 @@ struct TORCH_API RMSpropParamState
|
|||||||
: public OptimizerCloneableParamState<RMSpropParamState> {
|
: public OptimizerCloneableParamState<RMSpropParamState> {
|
||||||
TORCH_ARG(int64_t, step) = 0;
|
TORCH_ARG(int64_t, step) = 0;
|
||||||
TORCH_ARG(torch::Tensor, square_avg);
|
TORCH_ARG(torch::Tensor, square_avg);
|
||||||
TORCH_ARG(torch::Tensor, momentum_buffer) = {};
|
TORCH_ARG(torch::Tensor, momentum_buffer);
|
||||||
TORCH_ARG(torch::Tensor, grad_avg) = {};
|
TORCH_ARG(torch::Tensor, grad_avg);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void serialize(torch::serialize::InputArchive& archive) override;
|
void serialize(torch::serialize::InputArchive& archive) override;
|
||||||
|
@ -122,7 +122,7 @@ struct GraphTask : std::enable_shared_from_this<GraphTask> {
|
|||||||
|
|
||||||
// Note: this field is not ready to be used until the proper
|
// Note: this field is not ready to be used until the proper
|
||||||
// `thread_locals_.set_grad_mode()` call in the constructor.
|
// `thread_locals_.set_grad_mode()` call in the constructor.
|
||||||
at::ThreadLocalState thread_locals_ = at::ThreadLocalState();
|
at::ThreadLocalState thread_locals_;
|
||||||
|
|
||||||
std::unordered_set<c10::Stream> leaf_streams;
|
std::unordered_set<c10::Stream> leaf_streams;
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ class LambdaPostHook : public torch::autograd::FunctionPostHook {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::function<variable_list(const variable_list&, const variable_list&)> fn_;
|
std::function<variable_list(const variable_list&, const variable_list&)> fn_;
|
||||||
compiled_fn_type compiled_fn_{};
|
compiled_fn_type compiled_fn_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace torch::autograd::utils
|
} // namespace torch::autograd::utils
|
||||||
|
@ -176,9 +176,9 @@ struct FlightRecorder {
|
|||||||
size_t max_entries_ = 0;
|
size_t max_entries_ = 0;
|
||||||
size_t next_ = 0;
|
size_t next_ = 0;
|
||||||
size_t id_ = 0;
|
size_t id_ = 0;
|
||||||
std::map<size_t, std::shared_ptr<ProcessGroupStatus>> all_pg_status_ = {};
|
std::map<size_t, std::shared_ptr<ProcessGroupStatus>> all_pg_status_;
|
||||||
std::map<std::tuple<std::string, std::string>, std::vector<uint64_t>>
|
std::map<std::tuple<std::string, std::string>, std::vector<uint64_t>>
|
||||||
pg_name_to_ranks_ = {};
|
pg_name_to_ranks_;
|
||||||
std::string comm_lib_version_;
|
std::string comm_lib_version_;
|
||||||
|
|
||||||
std::optional<size_t> record(
|
std::optional<size_t> record(
|
||||||
|
@ -367,7 +367,7 @@ class NCCLComm {
|
|||||||
int rank_{};
|
int rank_{};
|
||||||
// Optional reason for communicator failure, provided by ProcessGroupNCCL for
|
// Optional reason for communicator failure, provided by ProcessGroupNCCL for
|
||||||
// better error messaging.
|
// better error messaging.
|
||||||
std::optional<std::string> commFailureReason_{};
|
std::optional<std::string> commFailureReason_;
|
||||||
bool initialized_{false};
|
bool initialized_{false};
|
||||||
// Whether this communicator is using nonblocking mode. Recorded during comm
|
// Whether this communicator is using nonblocking mode. Recorded during comm
|
||||||
// creation or split. For safety, we give a default value of true (more
|
// creation or split. For safety, we give a default value of true (more
|
||||||
|
@ -91,7 +91,7 @@ class TORCH_API ParamCommsDebugInfo : public c10::DebugInfoBase {
|
|||||||
std::vector<int64_t> outputSplitSizes_;
|
std::vector<int64_t> outputSplitSizes_;
|
||||||
int globalRankStart_{};
|
int globalRankStart_{};
|
||||||
int globalRankStride_{};
|
int globalRankStride_{};
|
||||||
std::vector<int64_t> groupRanks_{};
|
std::vector<int64_t> groupRanks_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define RECORD_PARAM_COMMS( \
|
#define RECORD_PARAM_COMMS( \
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
#include <ATen/core/dispatch/Dispatcher.h>
|
#include <ATen/core/dispatch/Dispatcher.h>
|
||||||
#include <c10/macros/Macros.h>
|
#include <c10/macros/Macros.h>
|
||||||
|
|
||||||
#include <torch/csrc/distributed/c10d/Work.hpp>
|
|
||||||
// *************************************************************************
|
// *************************************************************************
|
||||||
// PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN
|
// PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN
|
||||||
// versions 1.7 and 1.8.
|
// versions 1.7 and 1.8.
|
||||||
|
@ -284,7 +284,7 @@ class AsyncAllreduceWork : public ProcessGroupGloo::AsyncWork {
|
|||||||
reduceOp(std::move(reduceOp)),
|
reduceOp(std::move(reduceOp)),
|
||||||
tag(tag) {}
|
tag(tag) {}
|
||||||
|
|
||||||
std::vector<at::Tensor> inputs{};
|
std::vector<at::Tensor> inputs;
|
||||||
const ReduceOp reduceOp;
|
const ReduceOp reduceOp;
|
||||||
const uint32_t tag;
|
const uint32_t tag;
|
||||||
|
|
||||||
@ -399,7 +399,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork {
|
|||||||
inputs(inputs),
|
inputs(inputs),
|
||||||
tag(tag) {}
|
tag(tag) {}
|
||||||
|
|
||||||
std::vector<at::Tensor> inputs{};
|
std::vector<at::Tensor> inputs;
|
||||||
const uint32_t tag;
|
const uint32_t tag;
|
||||||
|
|
||||||
// We share dimensionality about the sparse tensors before collecting
|
// We share dimensionality about the sparse tensors before collecting
|
||||||
|
@ -732,7 +732,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
|||||||
std::condition_variable workMetaListCV_;
|
std::condition_variable workMetaListCV_;
|
||||||
|
|
||||||
// Heartbeat of watchdog thread.
|
// Heartbeat of watchdog thread.
|
||||||
std::atomic_uint64_t heartbeat_{};
|
std::atomic_uint64_t heartbeat_;
|
||||||
|
|
||||||
// Whether or not to propagate detected errors to all ranks in the same PG
|
// Whether or not to propagate detected errors to all ranks in the same PG
|
||||||
// through TCPStore.
|
// through TCPStore.
|
||||||
@ -1319,7 +1319,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
|||||||
int traceBufferSize_;
|
int traceBufferSize_;
|
||||||
|
|
||||||
// We gate the cudaEventCache so that we can roll it out gradually.
|
// We gate the cudaEventCache so that we can roll it out gradually.
|
||||||
std::atomic<bool> cudaEventCacheEnabled_{};
|
std::atomic<bool> cudaEventCacheEnabled_;
|
||||||
|
|
||||||
std::thread onCompletionHookThread_;
|
std::thread onCompletionHookThread_;
|
||||||
|
|
||||||
@ -1327,7 +1327,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
|||||||
std::atomic<bool> terminateProcessGroup_;
|
std::atomic<bool> terminateProcessGroup_;
|
||||||
|
|
||||||
// Whether there are hooks pending to be fired
|
// Whether there are hooks pending to be fired
|
||||||
std::atomic<bool> hasPendingHooks_{};
|
std::atomic<bool> hasPendingHooks_;
|
||||||
|
|
||||||
// This is the signal from watchdog threads to indicate whether the monitor
|
// This is the signal from watchdog threads to indicate whether the monitor
|
||||||
// thread should dump. Making it static so that it is accessible from all the
|
// thread should dump. Making it static so that it is accessible from all the
|
||||||
@ -1416,11 +1416,11 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
|||||||
// Whether or not to create start CUDAEvent and enable timing for start
|
// Whether or not to create start CUDAEvent and enable timing for start
|
||||||
// and end events. Note that enableTiming_ is always true if desyncDebug_
|
// and end events. Note that enableTiming_ is always true if desyncDebug_
|
||||||
// is set to true.
|
// is set to true.
|
||||||
std::atomic<bool> enableTiming_{};
|
std::atomic<bool> enableTiming_;
|
||||||
|
|
||||||
// Flag to enable the print of hash value of input/output of collectives for
|
// Flag to enable the print of hash value of input/output of collectives for
|
||||||
// verification.
|
// verification.
|
||||||
std::atomic<bool> enableCollectiveHashDebug_{};
|
std::atomic<bool> enableCollectiveHashDebug_;
|
||||||
|
|
||||||
// Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set
|
// Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set
|
||||||
bool avoidRecordStreams_ = false;
|
bool avoidRecordStreams_ = false;
|
||||||
|
@ -41,7 +41,7 @@ class TCPServer;
|
|||||||
class TCPClient;
|
class TCPClient;
|
||||||
|
|
||||||
struct SocketAddress {
|
struct SocketAddress {
|
||||||
std::string host{};
|
std::string host;
|
||||||
std::uint16_t port{};
|
std::uint16_t port{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ class BackgroundThread {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::atomic<bool> is_running_{false};
|
std::atomic<bool> is_running_{false};
|
||||||
std::thread daemonThread_{};
|
std::thread daemonThread_;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<BackgroundThread> create_tcpstore_backend(
|
std::unique_ptr<BackgroundThread> create_tcpstore_backend(
|
||||||
|
@ -437,7 +437,7 @@ inline at::Tensor newLikeFlat(
|
|||||||
}
|
}
|
||||||
at::DeviceGuard gpuGuard(device);
|
at::DeviceGuard gpuGuard(device);
|
||||||
std::vector<int64_t> sizes{static_cast<int64_t>(tensors[deviceIdx].size())};
|
std::vector<int64_t> sizes{static_cast<int64_t>(tensors[deviceIdx].size())};
|
||||||
std::vector<int64_t> strides{static_cast<int64_t>(t.numel())};
|
std::vector<int64_t> strides{t.numel()};
|
||||||
sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end());
|
sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end());
|
||||||
strides.insert(strides.end(), t.strides().begin(), t.strides().end());
|
strides.insert(strides.end(), t.strides().begin(), t.strides().end());
|
||||||
return at::empty_strided(
|
return at::empty_strided(
|
||||||
|
@ -62,7 +62,7 @@ class TORCH_API StoreCollectives : public ControlCollectives {
|
|||||||
int rank_;
|
int rank_;
|
||||||
int worldSize_;
|
int worldSize_;
|
||||||
|
|
||||||
c10::FastSet<std::string> seenKeys_{};
|
c10::FastSet<std::string> seenKeys_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace c10d
|
} // namespace c10d
|
||||||
|
@ -26,8 +26,8 @@
|
|||||||
|
|
||||||
namespace c10d {
|
namespace c10d {
|
||||||
|
|
||||||
constexpr int kDefaultFirstBucketBytes = int(1024 * 1024);
|
constexpr int kDefaultFirstBucketBytes = 1024 * 1024;
|
||||||
constexpr int kDefaultBucketBytesCap = int(25 * 1024 * 1024);
|
constexpr int kDefaultBucketBytesCap = 25 * 1024 * 1024;
|
||||||
// Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations.
|
// Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations.
|
||||||
constexpr int kDDPRuntimeLoggingSampleRate = 100;
|
constexpr int kDDPRuntimeLoggingSampleRate = 100;
|
||||||
|
|
||||||
|
@ -244,7 +244,7 @@ class RAIIC10IValueHandle {
|
|||||||
|
|
||||||
class MaybeOwningAtenTensorHandle {
|
class MaybeOwningAtenTensorHandle {
|
||||||
public:
|
public:
|
||||||
MaybeOwningAtenTensorHandle() : handle_(nullptr), raii_handle_() {}
|
MaybeOwningAtenTensorHandle() : handle_(nullptr) {}
|
||||||
// We skip copy constructor as MaybeOwningAtenTensorHandle might be RAII which
|
// We skip copy constructor as MaybeOwningAtenTensorHandle might be RAII which
|
||||||
// makes it undefined.
|
// makes it undefined.
|
||||||
MaybeOwningAtenTensorHandle(const MaybeOwningAtenTensorHandle& other) =
|
MaybeOwningAtenTensorHandle(const MaybeOwningAtenTensorHandle& other) =
|
||||||
|
@ -111,7 +111,7 @@ struct SchemaParser {
|
|||||||
L.expect(':');
|
L.expect(':');
|
||||||
name = fmt::format("{}::{}", name, L.expect(TK_IDENT).text_view());
|
name = fmt::format("{}::{}", name, L.expect(TK_IDENT).text_view());
|
||||||
}
|
}
|
||||||
std::string overload_name = "";
|
std::string overload_name;
|
||||||
if (L.nextIf('.')) {
|
if (L.nextIf('.')) {
|
||||||
overload_name = L.expect(TK_IDENT).text();
|
overload_name = L.expect(TK_IDENT).text();
|
||||||
}
|
}
|
||||||
|
@ -412,11 +412,7 @@ struct Token {
|
|||||||
|
|
||||||
struct Lexer {
|
struct Lexer {
|
||||||
explicit Lexer(std::shared_ptr<Source> source)
|
explicit Lexer(std::shared_ptr<Source> source)
|
||||||
: source(std::move(source)),
|
: source(std::move(source)), shared(sharedParserData()) {
|
||||||
|
|
||||||
indent_stack(),
|
|
||||||
next_tokens(),
|
|
||||||
shared(sharedParserData()) {
|
|
||||||
auto first_indent = lexRaw(true);
|
auto first_indent = lexRaw(true);
|
||||||
indent_stack.push_back(first_indent.range.size());
|
indent_stack.push_back(first_indent.range.size());
|
||||||
lex();
|
lex();
|
||||||
|
@ -867,7 +867,7 @@ std::shared_ptr<SugaredValue> TorchCheckValue::call(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args.size() >= 1) {
|
if (!args.empty()) {
|
||||||
if (found_cond_kwarg) {
|
if (found_cond_kwarg) {
|
||||||
throw(
|
throw(
|
||||||
ErrorReport(loc)
|
ErrorReport(loc)
|
||||||
|
@ -1769,7 +1769,7 @@ Node* Graph::createTupleSlice(
|
|||||||
|
|
||||||
int64_t i = beg;
|
int64_t i = beg;
|
||||||
for ([[maybe_unused]] const auto j : c10::irange(num_values)) {
|
for ([[maybe_unused]] const auto j : c10::irange(num_values)) {
|
||||||
auto idx = insertConstant(IValue(static_cast<int64_t>(i)));
|
auto idx = insertConstant(IValue(i));
|
||||||
auto tupleIndex = insertNode(createTupleIndex(tup, idx, tt->elements()[i]));
|
auto tupleIndex = insertNode(createTupleIndex(tup, idx, tt->elements()[i]));
|
||||||
|
|
||||||
new_vals.push_back(tupleIndex->output());
|
new_vals.push_back(tupleIndex->output());
|
||||||
|
@ -16,10 +16,6 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace c10 {
|
|
||||||
TypePtr parseType(const std::string& pythonStr);
|
|
||||||
} // namespace c10
|
|
||||||
|
|
||||||
namespace torch::jit {
|
namespace torch::jit {
|
||||||
|
|
||||||
using caffe2::serialize::FileAdapter;
|
using caffe2::serialize::FileAdapter;
|
||||||
@ -67,8 +63,7 @@ std::vector<IValue> get_bytecode_ivalues(PyTorchStreamReader& reader) {
|
|||||||
/********************** Bytecode **********************/
|
/********************** Bytecode **********************/
|
||||||
|
|
||||||
// Forward declare
|
// Forward declare
|
||||||
uint64_t _get_model_bytecode_version(
|
|
||||||
const std::vector<IValue>& bytecode_ivalues);
|
|
||||||
static uint64_t _get_model_bytecode_version_from_bytes(char* data, size_t size);
|
static uint64_t _get_model_bytecode_version_from_bytes(char* data, size_t size);
|
||||||
|
|
||||||
uint64_t _get_model_bytecode_version(std::istream& in) {
|
uint64_t _get_model_bytecode_version(std::istream& in) {
|
||||||
@ -250,8 +245,6 @@ std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
|
|||||||
/********************** Get Type Table **********************/
|
/********************** Get Type Table **********************/
|
||||||
|
|
||||||
// Forward declare
|
// Forward declare
|
||||||
std::unordered_set<std::string> _get_mobile_model_contained_types(
|
|
||||||
const std::vector<IValue>& bytecode_ivalues);
|
|
||||||
|
|
||||||
std::unordered_set<std::string> _get_mobile_model_contained_types(
|
std::unordered_set<std::string> _get_mobile_model_contained_types(
|
||||||
std::istream& in) {
|
std::istream& in) {
|
||||||
|
@ -93,7 +93,7 @@ enum ModelCompatibilityStatus {
|
|||||||
|
|
||||||
struct ModelCompatCheckResult {
|
struct ModelCompatCheckResult {
|
||||||
ModelCompatibilityStatus status;
|
ModelCompatibilityStatus status;
|
||||||
std::vector<std::string> errors{};
|
std::vector<std::string> errors;
|
||||||
};
|
};
|
||||||
// Takes in information about a runtime and a model and returns if the two are
|
// Takes in information about a runtime and a model and returns if the two are
|
||||||
// compatible with one another.
|
// compatible with one another.
|
||||||
|
@ -7,10 +7,6 @@
|
|||||||
#include <torch/custom_class.h>
|
#include <torch/custom_class.h>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
namespace c10 {
|
|
||||||
TypePtr parseType(const std::string& pythonStr);
|
|
||||||
} // namespace c10
|
|
||||||
|
|
||||||
namespace torch::jit {
|
namespace torch::jit {
|
||||||
|
|
||||||
uint64_t _get_runtime_bytecode_version() {
|
uint64_t _get_runtime_bytecode_version() {
|
||||||
|
@ -121,13 +121,6 @@ TORCH_API mobile::Module parse_flatbuffer_no_object(
|
|||||||
size_t size,
|
size_t size,
|
||||||
std::optional<at::Device> device);
|
std::optional<at::Device> device);
|
||||||
|
|
||||||
TORCH_API mobile::Module parse_and_initialize_mobile_module(
|
|
||||||
void* data,
|
|
||||||
size_t,
|
|
||||||
std::optional<at::Device>,
|
|
||||||
ExtraFilesMap* extra_files,
|
|
||||||
bool should_copy_tensor_memory);
|
|
||||||
|
|
||||||
// no op, TODO(qihan) delete
|
// no op, TODO(qihan) delete
|
||||||
TORCH_API bool register_flatbuffer_loader();
|
TORCH_API bool register_flatbuffer_loader();
|
||||||
|
|
||||||
|
@ -87,8 +87,6 @@ using caffe2::serialize::MemoryReadAdapter;
|
|||||||
using caffe2::serialize::PyTorchStreamReader;
|
using caffe2::serialize::PyTorchStreamReader;
|
||||||
using caffe2::serialize::ReadAdapterInterface;
|
using caffe2::serialize::ReadAdapterInterface;
|
||||||
|
|
||||||
OpCode parseOpCode(const char* str);
|
|
||||||
|
|
||||||
TypePtr resolveTypeNameMobile(
|
TypePtr resolveTypeNameMobile(
|
||||||
const c10::QualifiedName& qn,
|
const c10::QualifiedName& qn,
|
||||||
const std::shared_ptr<CompilationUnit>& compilation_unit) {
|
const std::shared_ptr<CompilationUnit>& compilation_unit) {
|
||||||
@ -216,7 +214,7 @@ class BytecodeDeserializer final {
|
|||||||
mobile::Function* function);
|
mobile::Function* function);
|
||||||
std::shared_ptr<CompilationUnit> compilation_unit_;
|
std::shared_ptr<CompilationUnit> compilation_unit_;
|
||||||
std::unordered_set<std::string> imported_libs_;
|
std::unordered_set<std::string> imported_libs_;
|
||||||
std::unique_ptr<PyTorchStreamReader> reader_{};
|
std::unique_ptr<PyTorchStreamReader> reader_;
|
||||||
std::optional<at::Device> device_;
|
std::optional<at::Device> device_;
|
||||||
uint64_t module_load_options_;
|
uint64_t module_load_options_;
|
||||||
// From `version` or `.data/version` in model.ptl and it's compute
|
// From `version` or `.data/version` in model.ptl and it's compute
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#include <torch/csrc/jit/runtime/vararg_functions.h>
|
#include <torch/csrc/jit/runtime/vararg_functions.h>
|
||||||
|
|
||||||
namespace torch::jit {
|
namespace torch::jit {
|
||||||
std::ostream& operator<<(std::ostream& out, Instruction inst);
|
|
||||||
namespace mobile {
|
namespace mobile {
|
||||||
InterpreterState::InterpreterState(const Code& code) {
|
InterpreterState::InterpreterState(const Code& code) {
|
||||||
enterFrame(code);
|
enterFrame(code);
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include <torch/custom_class_detail.h>
|
#include <torch/custom_class_detail.h>
|
||||||
|
|
||||||
namespace torch::jit {
|
namespace torch::jit {
|
||||||
OpCode parseOpCode(const char* str);
|
|
||||||
using c10::IValue;
|
using c10::IValue;
|
||||||
|
|
||||||
IValue expect_field(
|
IValue expect_field(
|
||||||
|
@ -84,7 +84,7 @@ Tensor SGD::step(const LossClosure& closure) {
|
|||||||
loss = closure();
|
loss = closure();
|
||||||
}
|
}
|
||||||
for (auto& group : param_groups_) {
|
for (auto& group : param_groups_) {
|
||||||
auto& options = static_cast<SGDOptions&>(group.options());
|
auto& options = group.options();
|
||||||
auto weight_decay = options.weight_decay();
|
auto weight_decay = options.weight_decay();
|
||||||
auto momentum = options.momentum();
|
auto momentum = options.momentum();
|
||||||
auto dampening = options.dampening();
|
auto dampening = options.dampening();
|
||||||
|
@ -272,8 +272,7 @@ Operation createUnaryOp(
|
|||||||
TORCH_INTERNAL_ASSERT(
|
TORCH_INTERNAL_ASSERT(
|
||||||
a_it.get_desc().get_size() % elementSize(a.scalar_type()) == 0);
|
a_it.get_desc().get_size() % elementSize(a.scalar_type()) == 0);
|
||||||
|
|
||||||
auto out_aten = at::from_blob(
|
auto out_aten = at::from_blob(out_raw_data, nelem, a_options_with_strided);
|
||||||
out_raw_data, {static_cast<int64_t>(nelem)}, a_options_with_strided);
|
|
||||||
aten_op(out_aten, in_aten);
|
aten_op(out_aten, in_aten);
|
||||||
push(stack, out);
|
push(stack, out);
|
||||||
};
|
};
|
||||||
|
@ -28,9 +28,6 @@ TORCH_API ValueToParamPairMap
|
|||||||
buildValueToParamsMap(Block* b, const ParamMap& paramsDict);
|
buildValueToParamsMap(Block* b, const ParamMap& paramsDict);
|
||||||
TORCH_API void eraseUnusedValuesFromMap(ValueToParamPairMap& valsToParamsMap);
|
TORCH_API void eraseUnusedValuesFromMap(ValueToParamPairMap& valsToParamsMap);
|
||||||
TORCH_API void eraseUnusedBlockInputs(Block* b);
|
TORCH_API void eraseUnusedBlockInputs(Block* b);
|
||||||
TORCH_API void buildParamsMapFromValueToParamsMap(
|
|
||||||
const ValueToParamPairMap& valsToParamsMap,
|
|
||||||
ParamMap& paramsDict);
|
|
||||||
|
|
||||||
TORCH_API Node* addNodeToBlock(
|
TORCH_API Node* addNodeToBlock(
|
||||||
Block* block,
|
Block* block,
|
||||||
|
@ -1439,8 +1439,8 @@ void ComputeConstant(Node* n, int opset_version) {
|
|||||||
for (auto cur_dim : shape_vector_0) {
|
for (auto cur_dim : shape_vector_0) {
|
||||||
num_elements *= cur_dim.static_size();
|
num_elements *= cur_dim.static_size();
|
||||||
}
|
}
|
||||||
dims.emplace_back(c10::ShapeSymbol::fromStaticSize(
|
dims.emplace_back(
|
||||||
static_cast<int64_t>(num_elements)));
|
c10::ShapeSymbol::fromStaticSize(num_elements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ struct QuantFusionInfo {
|
|||||||
std::string quantized_op_name;
|
std::string quantized_op_name;
|
||||||
std::string pattern;
|
std::string pattern;
|
||||||
std::string replacement;
|
std::string replacement;
|
||||||
std::vector<MatchFilter> filters = {};
|
std::vector<MatchFilter> filters;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
#include <torch/csrc/jit/passes/pass_manager.h>
|
#include <torch/csrc/jit/passes/pass_manager.h>
|
||||||
#include <torch/csrc/jit/passes/remove_redundant_profiles.h>
|
#include <torch/csrc/jit/passes/remove_redundant_profiles.h>
|
||||||
#include <torch/csrc/jit/passes/symbolic_shape_runtime_fusion.h>
|
#include <torch/csrc/jit/passes/symbolic_shape_runtime_fusion.h>
|
||||||
#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
|
|
||||||
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
|
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
|
||||||
#include <torch/csrc/jit/runtime/custom_operator.h>
|
#include <torch/csrc/jit/runtime/custom_operator.h>
|
||||||
#include <torch/csrc/jit/runtime/graph_executor.h>
|
#include <torch/csrc/jit/runtime/graph_executor.h>
|
||||||
|
@ -78,8 +78,7 @@ void flatten_rec(PyObject* obj, ParsedArgs& args) {
|
|||||||
args.desc.metadata.emplace_back(var);
|
args.desc.metadata.emplace_back(var);
|
||||||
args.desc.structure.push_back(D::Bool);
|
args.desc.structure.push_back(D::Bool);
|
||||||
} else if (PyLong_Check(obj)) { // Wrap longs in Long tensors
|
} else if (PyLong_Check(obj)) { // Wrap longs in Long tensors
|
||||||
at::Tensor var = scalar_to_tensor(
|
at::Tensor var = scalar_to_tensor(at::Scalar(THPUtils_unpackLong(obj)));
|
||||||
at::Scalar(static_cast<int64_t>(THPUtils_unpackLong(obj))));
|
|
||||||
args.vars.push_back(var);
|
args.vars.push_back(var);
|
||||||
args.desc.metadata.emplace_back(var);
|
args.desc.metadata.emplace_back(var);
|
||||||
args.desc.structure.push_back(D::Long);
|
args.desc.structure.push_back(D::Long);
|
||||||
|
@ -95,6 +95,5 @@ std::ostream& operator<<(std::ostream& out, Instruction inst);
|
|||||||
bool isOpSupportedInMobile(OpCode op);
|
bool isOpSupportedInMobile(OpCode op);
|
||||||
char const* toString(OpCode op);
|
char const* toString(OpCode op);
|
||||||
OpCode parseOpCode(const char* str);
|
OpCode parseOpCode(const char* str);
|
||||||
std::ostream& operator<<(std::ostream& out, Instruction inst);
|
|
||||||
|
|
||||||
} // namespace torch::jit
|
} // namespace torch::jit
|
||||||
|
@ -1710,7 +1710,7 @@ int64_t stringFindImpl(
|
|||||||
bool reverse = false) {
|
bool reverse = false) {
|
||||||
int64_t size = string.size();
|
int64_t size = string.size();
|
||||||
if (start < 0) {
|
if (start < 0) {
|
||||||
start = std::max(int64_t(0), int64_t(size + start));
|
start = std::max(int64_t(0), size + start);
|
||||||
}
|
}
|
||||||
if (end < 0) {
|
if (end < 0) {
|
||||||
end = std::max(int64_t(0), int64_t(size + end + 1));
|
end = std::max(int64_t(0), int64_t(size + end + 1));
|
||||||
@ -1964,7 +1964,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (start < 0) {
|
if (start < 0) {
|
||||||
start = std::max(int64_t(0), int64_t(size + start));
|
start = std::max(int64_t(0), size + start);
|
||||||
}
|
}
|
||||||
if (end < 0) {
|
if (end < 0) {
|
||||||
end = std::max(int64_t(0), int64_t(size + end + 1));
|
end = std::max(int64_t(0), int64_t(size + end + 1));
|
||||||
@ -1993,7 +1993,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
|
|||||||
std::string string = pop(stack).toStringRef();
|
std::string string = pop(stack).toStringRef();
|
||||||
int64_t size = string.size();
|
int64_t size = string.size();
|
||||||
if (start < 0) {
|
if (start < 0) {
|
||||||
start = std::max(int64_t(0), int64_t(size + start));
|
start = std::max(int64_t(0), (size + start));
|
||||||
}
|
}
|
||||||
if (end < 0) {
|
if (end < 0) {
|
||||||
end = std::max(int64_t(0), int64_t(size + end + 1));
|
end = std::max(int64_t(0), int64_t(size + end + 1));
|
||||||
@ -2019,7 +2019,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
|
|||||||
std::string string = pop(stack).toStringRef();
|
std::string string = pop(stack).toStringRef();
|
||||||
int64_t size = string.size();
|
int64_t size = string.size();
|
||||||
if (start < 0) {
|
if (start < 0) {
|
||||||
start = std::max(int64_t(0), int64_t(size + start));
|
start = std::max(int64_t(0), (size + start));
|
||||||
}
|
}
|
||||||
if (end < 0) {
|
if (end < 0) {
|
||||||
end = std::max(int64_t(0), int64_t(size + end + 1));
|
end = std::max(int64_t(0), int64_t(size + end + 1));
|
||||||
|
@ -1098,7 +1098,7 @@ namespace {
|
|||||||
|
|
||||||
void destroyNodeOutputs(ProcessedNode& p_node) {
|
void destroyNodeOutputs(ProcessedNode& p_node) {
|
||||||
const auto borrows_outputs = borrowsOutputs(p_node.node()->kind());
|
const auto borrows_outputs = borrowsOutputs(p_node.node()->kind());
|
||||||
const auto num_outputs = static_cast<uint32_t>(p_node.num_outputs());
|
const auto num_outputs = p_node.num_outputs();
|
||||||
for (const auto i : c10::irange<uint32_t>(num_outputs)) {
|
for (const auto i : c10::irange<uint32_t>(num_outputs)) {
|
||||||
auto& output = p_node.Output(i);
|
auto& output = p_node.Output(i);
|
||||||
if (doesNotHeapAllocateWhenStoredInIValue(*output.type())) {
|
if (doesNotHeapAllocateWhenStoredInIValue(*output.type())) {
|
||||||
@ -1863,7 +1863,7 @@ bool BlockRunner::check_for_memory_leak(
|
|||||||
const auto num_nodes = static_cast<uint32_t>(nodes_.size());
|
const auto num_nodes = static_cast<uint32_t>(nodes_.size());
|
||||||
for (const auto n : c10::irange(num_nodes)) {
|
for (const auto n : c10::irange(num_nodes)) {
|
||||||
auto& pnode = nodes_[n];
|
auto& pnode = nodes_[n];
|
||||||
const auto num_outputs = static_cast<uint32_t>(pnode.num_outputs());
|
const auto num_outputs = pnode.num_outputs();
|
||||||
for (const auto i : c10::irange(num_outputs)) {
|
for (const auto i : c10::irange(num_outputs)) {
|
||||||
const IValue* ival = &pnode.Output(i);
|
const IValue* ival = &pnode.Output(i);
|
||||||
const Value* val = pnode.node()->output(i);
|
const Value* val = pnode.node()->output(i);
|
||||||
@ -1943,7 +1943,7 @@ bool BlockRunner::checkOutputTensorMemoryLeaks() {
|
|||||||
const auto num_nodes = static_cast<uint32_t>(nodes_.size());
|
const auto num_nodes = static_cast<uint32_t>(nodes_.size());
|
||||||
for (const auto n : c10::irange(num_nodes)) {
|
for (const auto n : c10::irange(num_nodes)) {
|
||||||
auto& pnode = nodes_[n];
|
auto& pnode = nodes_[n];
|
||||||
const auto num_outputs = static_cast<uint32_t>(pnode.num_outputs());
|
const auto num_outputs = pnode.num_outputs();
|
||||||
for (const auto i : c10::irange(num_outputs)) {
|
for (const auto i : c10::irange(num_outputs)) {
|
||||||
const IValue* ival = &pnode.Output(i);
|
const IValue* ival = &pnode.Output(i);
|
||||||
const Value* val = pnode.node()->output(i);
|
const Value* val = pnode.node()->output(i);
|
||||||
@ -2042,7 +2042,7 @@ ProcessedFunction::ProcessedFunction(
|
|||||||
stack.emplace_back(static_cast<int>(size));
|
stack.emplace_back(static_cast<int>(size));
|
||||||
}
|
}
|
||||||
node_op(stack);
|
node_op(stack);
|
||||||
const auto num_outputs = static_cast<uint32_t>(pnode->num_outputs());
|
const auto num_outputs = pnode->num_outputs();
|
||||||
TORCH_DCHECK_EQ(stack.size(), num_outputs);
|
TORCH_DCHECK_EQ(stack.size(), num_outputs);
|
||||||
for (const auto i : c10::irange(num_outputs)) {
|
for (const auto i : c10::irange(num_outputs)) {
|
||||||
pnode->Output(i) = std::move(stack[i]);
|
pnode->Output(i) = std::move(stack[i]);
|
||||||
@ -2158,7 +2158,7 @@ bool ProcessedNode::verify_no_memory_overlap(bool force_check) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ProcessedNode::verify_outputs_dont_overlap_each_other() const {
|
bool ProcessedNode::verify_outputs_dont_overlap_each_other() const {
|
||||||
const auto n_outputs = static_cast<uint32_t>(num_outputs());
|
const auto n_outputs = num_outputs();
|
||||||
for (const auto i : c10::irange(n_outputs)) {
|
for (const auto i : c10::irange(n_outputs)) {
|
||||||
if (!Output(i).isTensor()) {
|
if (!Output(i).isTensor()) {
|
||||||
continue;
|
continue;
|
||||||
@ -2196,7 +2196,7 @@ bool ProcessedNode::verify_inputs_dont_overlap_outputs(bool force_check) const {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const auto n_inputs = static_cast<uint32_t>(inputs_.size());
|
const auto n_inputs = static_cast<uint32_t>(inputs_.size());
|
||||||
const auto n_outputs = static_cast<uint32_t>(num_outputs());
|
const auto n_outputs = num_outputs();
|
||||||
for (const auto i : c10::irange<uint32_t>(n_inputs)) {
|
for (const auto i : c10::irange<uint32_t>(n_inputs)) {
|
||||||
const IValue* in = &Input(i);
|
const IValue* in = &Input(i);
|
||||||
if (!in->isTensor()) {
|
if (!in->isTensor()) {
|
||||||
@ -2235,7 +2235,7 @@ bool ProcessedNode::check_and_correct_overlap_with(
|
|||||||
|
|
||||||
void ProcessedNode::verify_and_correct_memory_overlap() {
|
void ProcessedNode::verify_and_correct_memory_overlap() {
|
||||||
const auto n_inputs = static_cast<uint32_t>(inputs_.size());
|
const auto n_inputs = static_cast<uint32_t>(inputs_.size());
|
||||||
const auto n_outputs = static_cast<uint32_t>(num_outputs());
|
const auto n_outputs = num_outputs();
|
||||||
for (const auto i : c10::irange(n_inputs)) {
|
for (const auto i : c10::irange(n_inputs)) {
|
||||||
const IValue& in = Input(i);
|
const IValue& in = Input(i);
|
||||||
if (!in.isTensor()) {
|
if (!in.isTensor()) {
|
||||||
|
@ -142,9 +142,9 @@ class TORCH_API ManagedTensorRanges {
|
|||||||
|
|
||||||
// Maps Node* to the set of managed tensors that are now available
|
// Maps Node* to the set of managed tensors that are now available
|
||||||
// for reuse after this node.
|
// for reuse after this node.
|
||||||
c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_{};
|
c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_;
|
||||||
// Maps each Value* to its lifetime (start node index, end node index)
|
// Maps each Value* to its lifetime (start node index, end node index)
|
||||||
c10::FastMap<const Value*, Lifetime> value_lifetimes_{};
|
c10::FastMap<const Value*, Lifetime> value_lifetimes_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TORCH_API StaticModuleOptions {
|
struct TORCH_API StaticModuleOptions {
|
||||||
@ -395,7 +395,7 @@ class BlockInfo {
|
|||||||
c10::FastSet<const Value*> managed_output_tensor_values_;
|
c10::FastSet<const Value*> managed_output_tensor_values_;
|
||||||
c10::FastSet<const Value*> leaked_values_;
|
c10::FastSet<const Value*> leaked_values_;
|
||||||
|
|
||||||
ManagedTensorRanges managed_tensor_ranges_{};
|
ManagedTensorRanges managed_tensor_ranges_;
|
||||||
|
|
||||||
// The index of this block's inputs in the shared values_ array.
|
// The index of this block's inputs in the shared values_ array.
|
||||||
const uint16_t input_idx_;
|
const uint16_t input_idx_;
|
||||||
@ -549,7 +549,7 @@ class TORCH_API StaticModule {
|
|||||||
// IValue table (defined by prim::Constant nodes)
|
// IValue table (defined by prim::Constant nodes)
|
||||||
std::vector<IValue> constants_;
|
std::vector<IValue> constants_;
|
||||||
// The functions to be called by corresponding ProcessedNode.
|
// The functions to be called by corresponding ProcessedNode.
|
||||||
std::vector<ProcessedFunction> functions_{};
|
std::vector<ProcessedFunction> functions_;
|
||||||
// A list of pre-processed nodes from which ProcessedNode are created per
|
// A list of pre-processed nodes from which ProcessedNode are created per
|
||||||
// StaticRuntime instance.
|
// StaticRuntime instance.
|
||||||
std::vector<StaticNodeInfo> nodes_;
|
std::vector<StaticNodeInfo> nodes_;
|
||||||
|
@ -35,7 +35,7 @@ class StorageGroup {
|
|||||||
// allocated for all tensors in this storage group. Initially it
|
// allocated for all tensors in this storage group. Initially it
|
||||||
// is zero, eventually it gets updated by the MemoryPlanner.
|
// is zero, eventually it gets updated by the MemoryPlanner.
|
||||||
size_t max_tensor_size_ = 0;
|
size_t max_tensor_size_ = 0;
|
||||||
std::vector<at::Tensor*> group_{};
|
std::vector<at::Tensor*> group_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A contiguous buffer of `StorageImpl`s
|
// A contiguous buffer of `StorageImpl`s
|
||||||
@ -263,7 +263,7 @@ class MemoryPlanner {
|
|||||||
// to an ordinary "strong reference" state.
|
// to an ordinary "strong reference" state.
|
||||||
std::vector<IValue*> borrowed_ivalues_needing_incref_;
|
std::vector<IValue*> borrowed_ivalues_needing_incref_;
|
||||||
|
|
||||||
std::vector<std::pair<size_t, at::Tensor*>> managed_output_tensors_{};
|
std::vector<std::pair<size_t, at::Tensor*>> managed_output_tensors_;
|
||||||
at::DataPtr buffer_; // allocated each time we call Run()
|
at::DataPtr buffer_; // allocated each time we call Run()
|
||||||
uint8_t* buffer_start_{nullptr};
|
uint8_t* buffer_start_{nullptr};
|
||||||
uint8_t* buffer_end_{nullptr};
|
uint8_t* buffer_end_{nullptr};
|
||||||
@ -292,7 +292,7 @@ class StandardMemoryPlanner : public MemoryPlanner {
|
|||||||
void allocateManagedTensors() override;
|
void allocateManagedTensors() override;
|
||||||
void deallocateManagedTensors() override;
|
void deallocateManagedTensors() override;
|
||||||
|
|
||||||
std::vector<StorageGroup> managed_tensors_{};
|
std::vector<StorageGroup> managed_tensors_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace torch::jit
|
} // namespace torch::jit
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||||
#include <torch/csrc/jit/passes/inliner.h>
|
#include <torch/csrc/jit/passes/inliner.h>
|
||||||
#include <torch/csrc/jit/runtime/instruction.h>
|
#include <torch/csrc/jit/runtime/instruction.h>
|
||||||
#include <torch/csrc/jit/serialization/export.h>
|
|
||||||
#include <torch/csrc/jit/serialization/import_export_constants.h>
|
#include <torch/csrc/jit/serialization/import_export_constants.h>
|
||||||
#include <torch/csrc/jit/serialization/import_export_functions.h>
|
#include <torch/csrc/jit/serialization/import_export_functions.h>
|
||||||
#include <torch/csrc/jit/serialization/import_export_helpers.h>
|
#include <torch/csrc/jit/serialization/import_export_helpers.h>
|
||||||
|
@ -661,10 +661,10 @@ void ScriptModuleSerializer::writeByteCode(
|
|||||||
BackendDebugInfoRecorder debug_info_recorder;
|
BackendDebugInfoRecorder debug_info_recorder;
|
||||||
int64_t version_to_write = caffe2::serialize::kProducedBytecodeVersion;
|
int64_t version_to_write = caffe2::serialize::kProducedBytecodeVersion;
|
||||||
|
|
||||||
elements.emplace_back(static_cast<int64_t>(version_to_write));
|
elements.emplace_back(version_to_write);
|
||||||
std::vector<c10::IValue> debug_info_elements;
|
std::vector<c10::IValue> debug_info_elements;
|
||||||
// Always save debug handles
|
// Always save debug handles
|
||||||
debug_info_elements.emplace_back(static_cast<int64_t>(version_to_write));
|
debug_info_elements.emplace_back(version_to_write);
|
||||||
|
|
||||||
mobile::Module mobile_module =
|
mobile::Module mobile_module =
|
||||||
jitModuleToMobile(module, getOptionsFromGlobal());
|
jitModuleToMobile(module, getOptionsFromGlobal());
|
||||||
@ -913,7 +913,7 @@ void save_jit_module_to_write_func(
|
|||||||
const std::function<size_t(const void*, size_t)>& writer_func) {
|
const std::function<size_t(const void*, size_t)>& writer_func) {
|
||||||
(void)save_mobile_debug_info;
|
(void)save_mobile_debug_info;
|
||||||
auto buffer = save_jit_module_to_bytes(module, extra_files);
|
auto buffer = save_jit_module_to_bytes(module, extra_files);
|
||||||
writer_func(reinterpret_cast<void*>(buffer->data()), buffer->size());
|
writer_func(buffer->data(), buffer->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExportModule(
|
void ExportModule(
|
||||||
|
@ -140,12 +140,6 @@ TORCH_API Module load_jit_module_from_stream(
|
|||||||
ExtraFilesMap& extra_files,
|
ExtraFilesMap& extra_files,
|
||||||
std::optional<at::Device> device = std::nullopt);
|
std::optional<at::Device> device = std::nullopt);
|
||||||
|
|
||||||
TORCH_API Module parse_and_initialize_jit_module(
|
|
||||||
const std::shared_ptr<char>& data,
|
|
||||||
size_t size,
|
|
||||||
ExtraFilesMap& extra_files,
|
|
||||||
std::optional<at::Device> device);
|
|
||||||
|
|
||||||
TORCH_API c10::intrusive_ptr<c10::ivalue::Object> ObjLoaderFunc(
|
TORCH_API c10::intrusive_ptr<c10::ivalue::Object> ObjLoaderFunc(
|
||||||
const at::StrongTypePtr& type,
|
const at::StrongTypePtr& type,
|
||||||
IValue input);
|
IValue input);
|
||||||
|
@ -53,7 +53,7 @@ enum class PickleOpCode : char {
|
|||||||
BINFLOAT = 'G',
|
BINFLOAT = 'G',
|
||||||
|
|
||||||
// Protocol 2
|
// Protocol 2
|
||||||
PROTO = char('\x80'),
|
PROTO = '\x80',
|
||||||
NEWOBJ = '\x81',
|
NEWOBJ = '\x81',
|
||||||
EXT1 = '\x82',
|
EXT1 = '\x82',
|
||||||
EXT2 = '\x83',
|
EXT2 = '\x83',
|
||||||
@ -71,7 +71,7 @@ enum class PickleOpCode : char {
|
|||||||
SHORT_BINBYTES = 'C',
|
SHORT_BINBYTES = 'C',
|
||||||
|
|
||||||
// Protocol 4
|
// Protocol 4
|
||||||
SHORT_BINUNICODE = char('\x8c'),
|
SHORT_BINUNICODE = '\x8c',
|
||||||
BINUNICODE8 = '\x8d',
|
BINUNICODE8 = '\x8d',
|
||||||
BINBYTES8 = '\x8e',
|
BINBYTES8 = '\x8e',
|
||||||
EMPTY_SET = '\x8f',
|
EMPTY_SET = '\x8f',
|
||||||
|
@ -167,9 +167,7 @@ std::vector<char> SourceRangePickler::pickle(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ivalues.emplace_back(c10::ivalue::Tuple::create(
|
ivalues.emplace_back(c10::ivalue::Tuple::create(
|
||||||
{(int64_t)range.bytes,
|
{(int64_t)range.bytes, srs->serialize(range.range), source_range_tag}));
|
||||||
srs->serialize(range.range),
|
|
||||||
static_cast<int64_t>(source_range_tag)}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<at::Tensor> table;
|
std::vector<at::Tensor> table;
|
||||||
|
@ -1063,10 +1063,10 @@ void Unpickler::rebuildRRef() {
|
|||||||
// const reference will extend the lifetime of the temporary variable
|
// const reference will extend the lifetime of the temporary variable
|
||||||
const auto& rrefId = distributed::rpc::RRefId(
|
const auto& rrefId = distributed::rpc::RRefId(
|
||||||
static_cast<int16_t>(args.at(distributed::rpc::RREFID_ON_IDX).toInt()),
|
static_cast<int16_t>(args.at(distributed::rpc::RREFID_ON_IDX).toInt()),
|
||||||
static_cast<int64_t>(args.at(distributed::rpc::RREFID_ID_IDX).toInt()));
|
args.at(distributed::rpc::RREFID_ID_IDX).toInt());
|
||||||
const auto& forkId = distributed::rpc::RRefId(
|
const auto& forkId = distributed::rpc::RRefId(
|
||||||
static_cast<int16_t>(args.at(distributed::rpc::FORKID_ON_IDX).toInt()),
|
static_cast<int16_t>(args.at(distributed::rpc::FORKID_ON_IDX).toInt()),
|
||||||
static_cast<int64_t>(args.at(distributed::rpc::FORKID_ID_IDX).toInt()));
|
args.at(distributed::rpc::FORKID_ID_IDX).toInt());
|
||||||
auto parent =
|
auto parent =
|
||||||
static_cast<int16_t>(args.at(distributed::rpc::PARENT_IDX).toInt());
|
static_cast<int16_t>(args.at(distributed::rpc::PARENT_IDX).toInt());
|
||||||
const auto& typeStr = static_cast<std::string>(
|
const auto& typeStr = static_cast<std::string>(
|
||||||
|
@ -1082,8 +1082,7 @@ void CudaCodeGen::call_with_numel(void** args, int64_t numel) {
|
|||||||
// https://stackoverflow.com/questions/34388712/cannot-understand-how-jcuda-culaunchkernel-work
|
// https://stackoverflow.com/questions/34388712/cannot-understand-how-jcuda-culaunchkernel-work
|
||||||
std::vector<void*> ptr_to_args(buffer_args.size());
|
std::vector<void*> ptr_to_args(buffer_args.size());
|
||||||
for (size_t i = 0; i < buffer_args.size(); i++) {
|
for (size_t i = 0; i < buffer_args.size(); i++) {
|
||||||
ptr_to_args[i] =
|
ptr_to_args[i] = buffer_args[i].isVar() ? args[i] : (&args[i]);
|
||||||
buffer_args[i].isVar() ? args[i] : const_cast<void**>(&args[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto device = this->device().index();
|
const auto device = this->device().index();
|
||||||
|
@ -127,7 +127,7 @@ To raw_bitcast(const From& src) {
|
|||||||
TORCH_CHECK(sizeof(To) == sizeof(From), "Invalid bitcast invocation");
|
TORCH_CHECK(sizeof(To) == sizeof(From), "Invalid bitcast invocation");
|
||||||
To storage;
|
To storage;
|
||||||
std::memcpy(&storage, &src, sizeof(To));
|
std::memcpy(&storage, &src, sizeof(To));
|
||||||
return reinterpret_cast<To&>(storage);
|
return storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
class SimpleIREvaluatorImpl;
|
class SimpleIREvaluatorImpl;
|
||||||
|
@ -1482,7 +1482,7 @@ std::vector<BufPtr> TensorExprKernel::preAllocIntermediateBufs(
|
|||||||
remaining_interm_bufs.push_back(buf);
|
remaining_interm_bufs.push_back(buf);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto bp = (void*)malloc(size);
|
auto bp = malloc(size);
|
||||||
if (!bp) {
|
if (!bp) {
|
||||||
remaining_interm_bufs.push_back(buf);
|
remaining_interm_bufs.push_back(buf);
|
||||||
continue;
|
continue;
|
||||||
|
@ -369,7 +369,7 @@ void loopnestRandomization(int64_t seed, LoopNest& l) {
|
|||||||
|
|
||||||
// Find a random number of loops to fuse
|
// Find a random number of loops to fuse
|
||||||
int num_loops_to_fuse =
|
int num_loops_to_fuse =
|
||||||
std::max(2, (int)(std::rand() % (int)loops.size()));
|
std::max(2, (std::rand() % (int)loops.size()));
|
||||||
|
|
||||||
auto [loops_to_fuse, chosen_indices] =
|
auto [loops_to_fuse, chosen_indices] =
|
||||||
randomization_helper::select_n_randomly<ForPtr>(
|
randomization_helper::select_n_randomly<ForPtr>(
|
||||||
|
@ -42,13 +42,6 @@ TORCH_API Tensor computeQuantizedConv2dPrepack(
|
|||||||
const std::optional<ScalarType>& outputType,
|
const std::optional<ScalarType>& outputType,
|
||||||
at::Device device);
|
at::Device device);
|
||||||
|
|
||||||
TORCH_API Tensor computeQuantizedConv1d(
|
|
||||||
const std::vector<ArgValue>& inputs,
|
|
||||||
const std::vector<ExprHandle>& outputShape,
|
|
||||||
const std::vector<ExprHandle>& outputStrides,
|
|
||||||
const std::optional<ScalarType>& outputType,
|
|
||||||
at::Device device);
|
|
||||||
|
|
||||||
TORCH_API Tensor computeQuantizedConv2d(
|
TORCH_API Tensor computeQuantizedConv2d(
|
||||||
const std::vector<ArgValue>& inputs,
|
const std::vector<ArgValue>& inputs,
|
||||||
const std::vector<ExprHandle>& outputShape,
|
const std::vector<ExprHandle>& outputShape,
|
||||||
|
@ -369,7 +369,7 @@ struct ExtraFields<EventType::Kineto> {
|
|||||||
uint64_t correlation_id_{0};
|
uint64_t correlation_id_{0};
|
||||||
libkineto::ActivityType activity_type_;
|
libkineto::ActivityType activity_type_;
|
||||||
Flow flow;
|
Flow flow;
|
||||||
std::weak_ptr<Result> linked_activity_{};
|
std::weak_ptr<Result> linked_activity_;
|
||||||
std::string metadata_json_;
|
std::string metadata_json_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ struct FDE {
|
|||||||
throw UnwindError("unsupported 'eh' augmentation string");
|
throw UnwindError("unsupported 'eh' augmentation string");
|
||||||
}
|
}
|
||||||
code_alignment_factor_ = static_cast<int64_t>(LC.readULEB128());
|
code_alignment_factor_ = static_cast<int64_t>(LC.readULEB128());
|
||||||
data_alignment_factor_ = static_cast<int64_t>(LC.readSLEB128());
|
data_alignment_factor_ = LC.readSLEB128();
|
||||||
if (version == 1) {
|
if (version == 1) {
|
||||||
ra_register_ = LC.read<uint8_t>();
|
ra_register_ = LC.read<uint8_t>();
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user