mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[structural binding][5/N] Replace std::tie with structural binding (#120142)
This PR follows https://github.com/pytorch/pytorch/pull/119774, it is a continued work to clean up std::tie. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120142 Approved by: https://github.com/albanD
This commit is contained in:
@ -36,8 +36,7 @@ static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const
|
||||
int64_t d = 0;
|
||||
for (; d < size - (size % Vec::size()); d += Vec::size()) {
|
||||
Vec data_vec = Vec::loadu(input_data + begin+ d);
|
||||
Vectorized<float> data_vec0, data_vec1;
|
||||
std::tie(data_vec0, data_vec1) = convert_to_float<scalar_t>(data_vec);
|
||||
auto [data_vec0, data_vec1] = convert_to_float<scalar_t>(data_vec);
|
||||
Vectorized<float> min_vec = minimum(data_vec0, Vectorized<float>(float(0)));
|
||||
Vectorized<float> buffer_vec0 = data_vec0.abs().neg().exp();
|
||||
Vectorized<float> output_vec0 = min_vec - buffer_vec0.log1p();
|
||||
@ -49,8 +48,7 @@ static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const
|
||||
}
|
||||
if (size - d > 0) {
|
||||
Vec data_vec = Vec::loadu(input_data + begin + d, size - d);
|
||||
Vectorized<float> data_vec0, data_vec1;
|
||||
std::tie(data_vec0, data_vec1) = convert_to_float<scalar_t>(data_vec);
|
||||
auto [data_vec0, data_vec1] = convert_to_float<scalar_t>(data_vec);
|
||||
Vectorized<float> min_vec = minimum(data_vec0, Vectorized<float>(float(0)));
|
||||
Vectorized<float> buffer_vec0 = data_vec0.abs().neg().exp();
|
||||
Vectorized<float> output_vec0 = min_vec - buffer_vec0.log1p();
|
||||
@ -108,10 +106,9 @@ static void log_sigmoid_backward_cpu_kernel(TensorIterator& iter) {
|
||||
return (max_deriv - sign * (float(b) / (float(1) + b))) * float(c);
|
||||
},
|
||||
[=](Vec a, Vec b, Vec c) -> Vec {
|
||||
Vectorized<float> a0, a1, b0, b1, c0, c1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
std::tie(c0, c1) = convert_to_float<scalar_t>(c);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
auto [c0, c1] = convert_to_float<scalar_t>(c);
|
||||
auto mask = a0 < zero_vec;
|
||||
auto max_deriv_vec = Vectorized<float>::blendv(zero_vec, one_vec, mask);
|
||||
auto sign_vec = Vectorized<float>::blendv(one_vec.neg(), one_vec, mask);
|
||||
@ -164,9 +161,8 @@ static void threshold_kernel(
|
||||
return float(x) <= threshold ? value : other;
|
||||
},
|
||||
[&](Vectorized<scalar_t> x, Vectorized<scalar_t> other) -> Vectorized<scalar_t> {
|
||||
Vec x0, x1, other0, other1;
|
||||
std::tie(x0, x1) = convert_to_float<scalar_t>(x);
|
||||
std::tie(other0, other1) = convert_to_float<scalar_t>(other);
|
||||
auto [x0, x1] = convert_to_float<scalar_t>(x);
|
||||
auto [other0, other1] = convert_to_float<scalar_t>(other);
|
||||
return convert_from_float<scalar_t>(Vec::blendv(other0, value_v, x0 <= threshold_v),
|
||||
Vec::blendv(other1, value_v, x1 <= threshold_v));
|
||||
});
|
||||
@ -207,16 +203,15 @@ void elu_kernel(TensorIteratorBase& it, const Scalar& alpha, const Scalar& scale
|
||||
return float(a) <= float(0) ? (std::exp(float(a) * negiptcoef) - float(1)) * negcoef : float(a) * poscoef;
|
||||
},
|
||||
[&negcoef_vec, &negiptcoef_vec, &poscoef_vec, &one_vec, &zero_vec](Vectorized<scalar_t> a) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, res0, res1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto cmp0 = (a0 > zero_vec);
|
||||
auto cmp1 = (a1 > zero_vec);
|
||||
auto get_res_masked = [&](Vectorized<float>& cmp, Vectorized<float>& a) {
|
||||
return !cmp.zero_mask() ? a * poscoef_vec :
|
||||
Vectorized<float>::blendv(((a * negiptcoef_vec).exp() - one_vec) * negcoef_vec, a * poscoef_vec, cmp);
|
||||
};
|
||||
res0 = get_res_masked(cmp0, a0);
|
||||
res1 = get_res_masked(cmp1, a1);
|
||||
auto res0 = get_res_masked(cmp0, a0);
|
||||
auto res1 = get_res_masked(cmp1, a1);
|
||||
return convert_from_float<scalar_t>(res0, res1);
|
||||
});
|
||||
});
|
||||
@ -268,10 +263,8 @@ void elu_backward_kernel(TensorIteratorBase& it, const Scalar& alpha, const Scal
|
||||
}
|
||||
},
|
||||
[&negcoef_vec, &negiptcoef_vec, &poscoef_vec, &zero_vec, is_result](Vectorized<scalar_t> a, Vectorized<scalar_t> b) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, res0, res1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
Vectorized<float> b0, b1;
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
auto cmp0 = (b0 > zero_vec);
|
||||
auto cmp1 = (b1 > zero_vec);
|
||||
auto get_res_masked = [&](Vectorized<float>& cmp, Vectorized<float>& a, Vectorized<float>& b) {
|
||||
@ -282,8 +275,8 @@ void elu_backward_kernel(TensorIteratorBase& it, const Scalar& alpha, const Scal
|
||||
return Vectorized<float>::blendv(a * negiptcoef_vec * negcoef_vec * (b * negiptcoef_vec).exp(), a * poscoef_vec, cmp);
|
||||
}
|
||||
};
|
||||
res0 = get_res_masked(cmp0, a0, b0);
|
||||
res1 = get_res_masked(cmp1, a1, b1);
|
||||
auto res0 = get_res_masked(cmp0, a0, b0);
|
||||
auto res1 = get_res_masked(cmp1, a1, b1);
|
||||
return convert_from_float<scalar_t>(res0, res1);
|
||||
});
|
||||
});
|
||||
@ -364,8 +357,7 @@ void GeluKernelImpl(TensorIteratorBase& it, GeluType approximate) {
|
||||
return float(0.5) * float(x) * (float(1) + std::tanh(inner));
|
||||
},
|
||||
[&](Vectorized<scalar_t> x) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x0, x1;
|
||||
std::tie(x0, x1) = convert_to_float<scalar_t>(x);
|
||||
auto [x0, x1] = convert_to_float<scalar_t>(x);
|
||||
auto x0_cube = x0 * x0 * x0;
|
||||
auto x1_cube = x1 * x1 * x1;
|
||||
auto inner_vec0 = kBetaVec * (x0 + kKappaVec * x0_cube);
|
||||
@ -414,8 +406,7 @@ void GeluKernelImpl(TensorIteratorBase& it, GeluType approximate) {
|
||||
return float(x) * float(0.5) * (float(1) + std::erf(float(x) * kAlpha));
|
||||
},
|
||||
[&](Vectorized<scalar_t> x) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x0, x1;
|
||||
std::tie(x0, x1) = convert_to_float<scalar_t>(x);
|
||||
auto [x0, x1] = convert_to_float<scalar_t>(x);
|
||||
auto res0 = x0 * kPointFiveVec * (kOneVec + (x0 * kAlphaVec).erf());
|
||||
auto res1 = x1 * kPointFiveVec * (kOneVec + (x1 * kAlphaVec).erf());
|
||||
return convert_from_float<scalar_t>(res0, res1);
|
||||
@ -477,10 +468,8 @@ void GeluBackwardKernelImpl(TensorIteratorBase& it, GeluType approximate) {
|
||||
return float(dy) * (left_derivative + right_derivative);
|
||||
},
|
||||
[&](Vectorized<scalar_t> dy_vec, Vectorized<scalar_t> x_vec) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x0_vec, x1_vec;
|
||||
std::tie(x0_vec, x1_vec) = convert_to_float<scalar_t>(x_vec);
|
||||
Vectorized<float> dy0_vec, dy1_vec;
|
||||
std::tie(dy0_vec, dy1_vec) = convert_to_float<scalar_t>(dy_vec);
|
||||
auto [x0_vec, x1_vec] = convert_to_float<scalar_t>(x_vec);
|
||||
auto [dy0_vec, dy1_vec] = convert_to_float<scalar_t>(dy_vec);
|
||||
auto x0_sq = x0_vec * x0_vec;
|
||||
auto x1_sq = x1_vec * x1_vec;
|
||||
auto x0_cube = x0_vec * x0_vec * x0_vec;
|
||||
@ -583,10 +572,8 @@ void GeluBackwardKernelImpl(TensorIteratorBase& it, GeluType approximate) {
|
||||
return float(dy) * (cdf + float(x) * pdf);
|
||||
},
|
||||
[&](Vectorized<scalar_t> dy, Vectorized<scalar_t> x) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x0, x1;
|
||||
std::tie(x0, x1) = convert_to_float<scalar_t>(x);
|
||||
Vectorized<float> dy0, dy1;
|
||||
std::tie(dy0, dy1) = convert_to_float<scalar_t>(dy);
|
||||
auto [x0, x1] = convert_to_float<scalar_t>(x);
|
||||
auto [dy0, dy1] = convert_to_float<scalar_t>(dy);
|
||||
auto cdf_vec0 = kPointFiveVec * (kOneVec + (x0 * kAlphaVec).erf());
|
||||
auto cdf_vec1 = kPointFiveVec * (kOneVec + (x1 * kAlphaVec).erf());
|
||||
auto pdf_vec0 = kBetaVec * (x0 * x0 * kMinusPointFiveVec).exp();
|
||||
@ -643,8 +630,7 @@ void hardsigmoid_kernel(TensorIteratorBase& iter) {
|
||||
return std::min(std::max(float(self_val) + three, zero), six) / six;
|
||||
},
|
||||
[&](vec::Vectorized<scalar_t> self_val) -> vec::Vectorized<scalar_t> {
|
||||
Vectorized<float> self_val0, self_val1;
|
||||
std::tie(self_val0, self_val1) = convert_to_float<scalar_t>(self_val);
|
||||
auto [self_val0, self_val1] = convert_to_float<scalar_t>(self_val);
|
||||
self_val0 = minimum(
|
||||
maximum(self_val0 + kThreeVec, kZeroVec),
|
||||
kSixVec
|
||||
@ -698,9 +684,8 @@ void hardsigmoid_backward_kernel(TensorIteratorBase& iter) {
|
||||
: zero;
|
||||
},
|
||||
[=](Vectorized<scalar_t> grad_val, Vectorized<scalar_t> self_val) -> Vectorized<scalar_t> {
|
||||
Vec self_val0, self_val1, grad_val0, grad_val1;
|
||||
std::tie(self_val0, self_val1) = convert_to_float<scalar_t>(self_val);
|
||||
std::tie(grad_val0, grad_val1) = convert_to_float<scalar_t>(grad_val);
|
||||
auto [self_val0, self_val1] = convert_to_float<scalar_t>(self_val);
|
||||
auto [grad_val0, grad_val1] = convert_to_float<scalar_t>(grad_val);
|
||||
Vec gradNonZeroMask = (self_val0 > neg_three) & (self_val0 < three);
|
||||
self_val0 = Vec::blendv(kZeroVec, grad_val0 * kOneSixthVec, gradNonZeroMask);
|
||||
gradNonZeroMask = (self_val1 > neg_three) & (self_val1 < three);
|
||||
@ -759,11 +744,9 @@ void softshrink_kernel(TensorIteratorBase& iter, const Scalar& lambd) {
|
||||
return float(a) > lambd_val ? a - lambd_val : (float(a) < -lambd_val ? a + lambd_val : float(0));
|
||||
},
|
||||
[=](Vectorized<scalar_t> self_val) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> self_val0, self_val1;
|
||||
Vectorized<scalar_t> self_val_t0, self_val_t1;
|
||||
std::tie(self_val0, self_val1) = convert_to_float<scalar_t>(self_val);
|
||||
self_val_t0 = convert_from_float<scalar_t>((self_val0 > lambdVec) & (self_val0 - lambdVec), (self_val1 > lambdVec) & (self_val1 - lambdVec));
|
||||
self_val_t1 = convert_from_float<scalar_t>((self_val0 < -lambd_val) & (self_val0 + lambdVec), (self_val1 < -lambd_val) & (self_val1 + lambdVec));
|
||||
auto [self_val0, self_val1] = convert_to_float<scalar_t>(self_val);
|
||||
auto self_val_t0 = convert_from_float<scalar_t>((self_val0 > lambdVec) & (self_val0 - lambdVec), (self_val1 > lambdVec) & (self_val1 - lambdVec));
|
||||
auto self_val_t1 = convert_from_float<scalar_t>((self_val0 < -lambd_val) & (self_val0 + lambdVec), (self_val1 < -lambd_val) & (self_val1 + lambdVec));
|
||||
return (self_val_t0 | self_val_t1);
|
||||
});
|
||||
});
|
||||
@ -812,9 +795,8 @@ void hardtanh_backward_kernel(TensorIterator& iter, const Scalar& min, const Sca
|
||||
return (float(self_val) <= min_val || float(self_val) >= max_val) ? scalar_t(0) : grad_val;
|
||||
},
|
||||
[=](Vectorized<scalar_t> grad_val, Vectorized<scalar_t> self_val) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> grad_val0, grad_val1, self_val0, self_val1;
|
||||
std::tie(grad_val0, grad_val1) = convert_to_float<scalar_t>(grad_val);
|
||||
std::tie(self_val0, self_val1) = convert_to_float<scalar_t>(self_val);
|
||||
auto [grad_val0, grad_val1] = convert_to_float<scalar_t>(grad_val);
|
||||
auto [self_val0, self_val1] = convert_to_float<scalar_t>(self_val);
|
||||
return convert_from_float<scalar_t>(
|
||||
((self_val0 > min_val) & (self_val0 < max_val)) & grad_val0,
|
||||
((self_val1 > min_val) & (self_val1 < max_val)) & grad_val1
|
||||
@ -853,8 +835,7 @@ void hardswish_kernel(TensorIterator& iter) {
|
||||
return float(x) * std::min(std::max(float(x) + three, zero), six) / six;
|
||||
},
|
||||
[&](vec::Vectorized<scalar_t> x_vec) {
|
||||
Vectorized<float> x_vec0, x_vec1;
|
||||
std::tie(x_vec0, x_vec1) = convert_to_float<scalar_t>(x_vec);
|
||||
auto [x_vec0, x_vec1] = convert_to_float<scalar_t>(x_vec);
|
||||
x_vec0 = x_vec0 * minimum(
|
||||
maximum(x_vec0 + kThreeVec, kZeroVec),
|
||||
kSixVec
|
||||
@ -915,9 +896,8 @@ void hardswish_backward_kernel(TensorIterator& iter) {
|
||||
}
|
||||
},
|
||||
[&](vec::Vectorized<scalar_t> grad_val, vec::Vectorized<scalar_t> self_val) {
|
||||
Vectorized<float> self_val0, self_val1, grad_val0, grad_val1;
|
||||
std::tie(self_val0, self_val1) = convert_to_float<scalar_t>(self_val);
|
||||
std::tie(grad_val0, grad_val1) = convert_to_float<scalar_t>(grad_val);
|
||||
auto [self_val0, self_val1] = convert_to_float<scalar_t>(self_val);
|
||||
auto [grad_val0, grad_val1] = convert_to_float<scalar_t>(grad_val);
|
||||
self_val0 = Vec::blendv(
|
||||
Vec::blendv(
|
||||
grad_val0 * ((self_val0 / kThreeVec) + kOneHalfVec),
|
||||
@ -990,8 +970,7 @@ static void leaky_relu_kernel(TensorIteratorBase& iter, const Scalar& negval_) {
|
||||
return float(a) > float(0) ? float(a) : float(a) * negval;
|
||||
},
|
||||
[&](Vectorized<scalar_t> a) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto res0 = a0 * (Vectorized<float>::blendv(negval_v, one_vec, a0 > zero_vec));
|
||||
auto res1 = a1 * (Vectorized<float>::blendv(negval_v, one_vec, a1 > zero_vec));
|
||||
return convert_from_float<scalar_t>(res0, res1);
|
||||
@ -1030,9 +1009,8 @@ static void leaky_relu_backward_kernel(TensorIteratorBase& iter, const Scalar& n
|
||||
return float(a) > float(0) ? float(b) : float(b) * negval;
|
||||
},
|
||||
[&](Vectorized<scalar_t> a, Vectorized<scalar_t> b) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, b0, b1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
auto res0 = b0 * (Vectorized<float>::blendv(negval_v, one_vec, a0 > zero_vec));
|
||||
auto res1 = b1 * (Vectorized<float>::blendv(negval_v, one_vec, a1 > zero_vec));
|
||||
return convert_from_float<scalar_t>(res0, res1);
|
||||
@ -1073,8 +1051,7 @@ void softplus_kernel(TensorIteratorBase& iter, const Scalar& beta_, const Scalar
|
||||
: static_cast<scalar_t>((std::log1p(std::exp(float(a) * beta))) / beta);
|
||||
},
|
||||
[beta_vec, threshold_vec](Vectorized<scalar_t> a) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
a0 = Vec::blendv((a0 * beta_vec).exp().log1p() / beta_vec, a0, (a0 * beta_vec) > threshold_vec);
|
||||
a1 = Vec::blendv((a1 * beta_vec).exp().log1p() / beta_vec, a1, (a1 * beta_vec) > threshold_vec);
|
||||
return convert_from_float<scalar_t>(a0, a1);
|
||||
@ -1118,9 +1095,8 @@ void softplus_backward_kernel(TensorIteratorBase& iter, const Scalar& beta_, con
|
||||
return (float(b) * beta) > threshold ? a : static_cast<scalar_t>(float(a) * z / (z + float(1.)));
|
||||
},
|
||||
[beta_vec, one_vec, threshold_vec](Vectorized<scalar_t> a, Vectorized<scalar_t> b) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, b0, b1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
Vec z = (b0 * beta_vec).exp();
|
||||
a0 = Vec::blendv(a0 * z / (z + one_vec), a0, (b0 * beta_vec) > threshold_vec);
|
||||
z = (b1 * beta_vec).exp();
|
||||
@ -1162,9 +1138,8 @@ void glu_kernel(TensorIteratorBase& iter) {
|
||||
return float(a) * (float_one_val / (float_one_val + std::exp(- float(b))));
|
||||
},
|
||||
[float_one_vec](Vectorized<scalar_t> a, Vectorized<scalar_t> b) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, b0, b1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
return convert_from_float<scalar_t>(a0 * (float_one_vec / (float_one_vec + b0.neg().exp())),
|
||||
a1 * (float_one_vec / (float_one_vec + b1.neg().exp())));
|
||||
});
|
||||
@ -1217,10 +1192,9 @@ void glu_backward_kernel(TensorIterator& iter) {
|
||||
return (float_one_val - float(a)) * float(a) * float(b) * float(c);
|
||||
},
|
||||
[float_one_vec](Vectorized<scalar_t> a, Vectorized<scalar_t> b, Vectorized<scalar_t> c) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> a0, a1, b0, b1, c0, c1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
std::tie(b0, b1) = convert_to_float<scalar_t>(b);
|
||||
std::tie(c0, c1) = convert_to_float<scalar_t>(c);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
auto [b0, b1] = convert_to_float<scalar_t>(b);
|
||||
auto [c0, c1] = convert_to_float<scalar_t>(c);
|
||||
a0 = (float_one_vec - a0) * a0 * b0 * c0;
|
||||
a1 = (float_one_vec - a1) * a1 * b1 * c1;
|
||||
return convert_from_float<scalar_t>(a0, a1);
|
||||
@ -1254,8 +1228,7 @@ void silu_kernel(TensorIteratorBase& iter) {
|
||||
return float(x) / (1.0f + std::exp(-float(x)));
|
||||
},
|
||||
[kOneVec](Vectorized<scalar_t> x_vec) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x_vec0, x_vec1;
|
||||
std::tie(x_vec0, x_vec1) = convert_to_float<scalar_t>(x_vec);
|
||||
auto [x_vec0, x_vec1] = convert_to_float<scalar_t>(x_vec);
|
||||
return convert_from_float<scalar_t>(
|
||||
x_vec0 / (kOneVec + x_vec0.neg().exp()),
|
||||
x_vec1 / (kOneVec + x_vec1.neg().exp()));
|
||||
@ -1289,9 +1262,8 @@ void silu_backward_kernel(TensorIteratorBase& iter) {
|
||||
return dy * sigmoid * (1.0f + x * (1.0f - sigmoid));
|
||||
},
|
||||
[kOneVec](Vectorized<scalar_t> dy_vec, Vectorized<scalar_t> x_vec) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x_vec0, x_vec1, dy_vec0, dy_vec1;
|
||||
std::tie(x_vec0, x_vec1) = convert_to_float<scalar_t>(x_vec);
|
||||
std::tie(dy_vec0, dy_vec1) = convert_to_float<scalar_t>(dy_vec);
|
||||
auto [x_vec0, x_vec1] = convert_to_float<scalar_t>(x_vec);
|
||||
auto [dy_vec0, dy_vec1] = convert_to_float<scalar_t>(dy_vec);
|
||||
const Vectorized<float> sigmoid0 =
|
||||
kOneVec / (kOneVec + x_vec0.neg().exp());
|
||||
const Vectorized<float> sigmoid1 =
|
||||
@ -1330,8 +1302,7 @@ void mish_kernel(TensorIteratorBase& iter) {
|
||||
return static_cast<scalar_t>(float(x) * std::tanh(std::log1p(std::exp(float(x)))));
|
||||
},
|
||||
[](Vectorized<scalar_t> x_vec) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x_vec0, x_vec1;
|
||||
std::tie(x_vec0, x_vec1) = convert_to_float<scalar_t>(x_vec);
|
||||
auto [x_vec0, x_vec1] = convert_to_float<scalar_t>(x_vec);
|
||||
return convert_from_float<scalar_t>(
|
||||
x_vec0 * x_vec0.exp().log1p().tanh(),
|
||||
x_vec1 * x_vec1.exp().log1p().tanh()
|
||||
@ -1367,9 +1338,8 @@ void mish_backward_kernel(TensorIterator& iter) {
|
||||
return dy * (tanh_softplus + x * sigmoid * (1.0f - tanh_softplus * tanh_softplus));
|
||||
},
|
||||
[kOneVec](Vectorized<scalar_t> dy_vec, Vectorized<scalar_t> x_vec) -> Vectorized<scalar_t> {
|
||||
Vectorized<float> x_vec0, x_vec1, dy_vec0, dy_vec1;
|
||||
std::tie(x_vec0, x_vec1) = convert_to_float<scalar_t>(x_vec);
|
||||
std::tie(dy_vec0, dy_vec1) = convert_to_float<scalar_t>(dy_vec);
|
||||
auto [x_vec0, x_vec1] = convert_to_float<scalar_t>(x_vec);
|
||||
auto [dy_vec0, dy_vec1] = convert_to_float<scalar_t>(dy_vec);
|
||||
const Vec sigmoid0 = kOneVec / (kOneVec + x_vec0.neg().exp());
|
||||
const Vec sigmoid1 = kOneVec / (kOneVec + x_vec1.neg().exp());
|
||||
const Vec tanh_softplus0 = x_vec0.exp().log1p().tanh();
|
||||
|
@ -79,8 +79,7 @@ void _amp_foreach_non_finite_check_and_unscale_cpu_kernel(
|
||||
inv_scale_val == 1.f ? val : val * inv_scale_val);
|
||||
},
|
||||
[found_inf_ptr, inv_scale_ptr](Vectorized<scalar_t> val_vec) -> Vectorized<scalar_t>{
|
||||
Vectorized<opmath_t> val_vec0, val_vec1;
|
||||
std::tie(val_vec0, val_vec1) = convert_to_float<scalar_t>(val_vec);
|
||||
auto [val_vec0, val_vec1] = convert_to_float<scalar_t>(val_vec);
|
||||
if (val_vec0.has_inf_nan() || val_vec1.has_inf_nan()) {
|
||||
*found_inf_ptr = 1.f;
|
||||
}
|
||||
|
@ -179,8 +179,7 @@ compute_internal(
|
||||
for (; d2 < len; d2 += Vec::size()) {
|
||||
iVec index_ivec = iVec(id * input_height * input_width + ih * input_width + iw);
|
||||
Vec val_bvec = Vec::loadu(in + d2);
|
||||
fVec val_fvec0, val_fvec1;
|
||||
std::tie(val_fvec0, val_fvec1) = convert_to_float<scalar_t>(val_bvec);
|
||||
auto [val_fvec0, val_fvec1] = convert_to_float<scalar_t>(val_bvec);
|
||||
|
||||
iVec maxindex_ivec0 = iVec::loadu(index_ptr + d2);
|
||||
iVec maxindex_ivec1 = iVec::loadu(index_ptr + d2 + iVec::size());
|
||||
|
@ -117,10 +117,9 @@ static void smooth_l1_backward_cpu_kernel(TensorIterator& iter, const Scalar& no
|
||||
// 1 if x >= beta
|
||||
// -1 if x <= -beta
|
||||
// x / beta if |x| < beta
|
||||
Vectorized<float> input0, input1, target0, target1, grad_output0, grad_output1;
|
||||
std::tie(input0, input1) = convert_bfloat16_float(input);
|
||||
std::tie(target0, target1) = convert_bfloat16_float(target);
|
||||
std::tie(grad_output0, grad_output1) = convert_bfloat16_float(grad_output);
|
||||
auto [input0, input1] = convert_bfloat16_float(input);
|
||||
auto [target0, target1] = convert_bfloat16_float(target);
|
||||
auto [grad_output0, grad_output1] = convert_bfloat16_float(grad_output);
|
||||
auto x = input0 - target0;
|
||||
auto pos_or_neg_1_vec = Vectorized<float>::blendv(
|
||||
neg_1_vec, pos_1_vec, x > zero_vec);
|
||||
|
@ -183,8 +183,7 @@ inline void norm_two_reduce_step(Vectorized<acc_t>& acc_vec, Vectorized<scalar_t
|
||||
|
||||
template <>
|
||||
inline void norm_two_reduce_step(Vectorized<float>& acc_fvec, Vectorized<BFloat16>& data_bvec) {
|
||||
Vectorized<float> data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_bfloat16_float(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_bfloat16_float(data_bvec);
|
||||
acc_fvec += data_fvec0 * data_fvec0;
|
||||
acc_fvec += data_fvec1 * data_fvec1;
|
||||
}
|
||||
|
@ -158,8 +158,7 @@ inline void map_acc(
|
||||
constexpr int64_t kaVecSize = aVec::size();
|
||||
for (d = 0; d < size - (size % kVecSize); d += kVecSize) {
|
||||
Vec data2_vec = Vec::loadu(input_data2 + d);
|
||||
aVec data2_avec0, data2_avec1;
|
||||
std::tie(data2_avec0, data2_avec1) = convert_to_float<scalar_t>(data2_vec);
|
||||
auto [data2_avec0, data2_avec1] = convert_to_float<scalar_t>(data2_vec);
|
||||
aVec input_vec0 = aVec::loadu(input_data + d);
|
||||
aVec input_vec1 = aVec::loadu(input_data + d + kaVecSize);
|
||||
vec_fun(input_vec0, data2_avec0).store(output_data + d);
|
||||
@ -168,8 +167,7 @@ inline void map_acc(
|
||||
if (size - d > 0) {
|
||||
int64_t tail_size = size - d;
|
||||
Vec data2_vec = Vec::loadu(input_data2 + d, tail_size);
|
||||
aVec data2_avec0, data2_avec1;
|
||||
std::tie(data2_avec0, data2_avec1) = convert_to_float<scalar_t>(data2_vec);
|
||||
auto [data2_avec0, data2_avec1] = convert_to_float<scalar_t>(data2_vec);
|
||||
if (tail_size > kaVecSize) {
|
||||
aVec input_vec0 = aVec::loadu(input_data + d);
|
||||
aVec input_vec1 = aVec::loadu(input_data + d + kaVecSize, tail_size - kaVecSize);
|
||||
|
@ -44,8 +44,7 @@ static void sigmoid_kernel(TensorIteratorBase& iter) {
|
||||
return static_cast<float>(1) / (static_cast<float>(1) + std::exp((-a0)));
|
||||
},
|
||||
[=](Vectorized<scalar_t> a) {
|
||||
Vectorized<float> a0, a1;
|
||||
std::tie(a0, a1) = convert_to_float<scalar_t>(a);
|
||||
auto [a0, a1] = convert_to_float<scalar_t>(a);
|
||||
a0 = (Vectorized<float>(static_cast<float>(1)) + a0.neg().exp()).reciprocal();
|
||||
a1 = (Vectorized<float>(static_cast<float>(1)) + a1.neg().exp()).reciprocal();
|
||||
return convert_from_float<scalar_t>(a0, a1);
|
||||
|
@ -753,8 +753,7 @@ batch_norm_cpu_contiguous_impl(Tensor& output, const Tensor& input,
|
||||
int64_t d = 0;
|
||||
for (; d < loop_size; d += bVec::size()) {
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
|
||||
fVec out_fvec0 = data_fvec0 * alpha_fvec + beta_fvec;
|
||||
fVec out_fvec1 = data_fvec1 * alpha_fvec + beta_fvec;
|
||||
@ -813,8 +812,7 @@ batch_norm_cpu_channels_last_impl(Tensor& output, const Tensor& input,
|
||||
fVec beta_fvec0 = fVec::loadu(beta_data + d);
|
||||
fVec beta_fvec1 = fVec::loadu(beta_data + d + fVec::size());
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
|
||||
fVec out_fvec0 = data_fvec0 * alpha_fvec0 + beta_fvec0;
|
||||
fVec out_fvec1 = data_fvec1 * alpha_fvec1 + beta_fvec1;
|
||||
@ -852,8 +850,7 @@ inline void batch_norm_cpu_collect_stats_contiguous_internal(
|
||||
int64_t d = 0;
|
||||
for (; d < image_size - (image_size % bVec::size()); d += bVec::size()) {
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
sum_fvec += data_fvec0;
|
||||
sum_fvec += data_fvec1;
|
||||
}
|
||||
@ -874,8 +871,7 @@ inline void batch_norm_cpu_collect_stats_contiguous_internal(
|
||||
int64_t d = 0;
|
||||
for (; d < image_size - (image_size % bVec::size()); d += bVec::size()) {
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
var_fvec += (data_fvec0 - mean_fvec) * (data_fvec0 - mean_fvec);
|
||||
var_fvec += (data_fvec1 - mean_fvec) * (data_fvec1 - mean_fvec);
|
||||
}
|
||||
@ -929,8 +925,7 @@ inline void batch_norm_cpu_collect_stats_channels_last_internal(
|
||||
int64_t d = 0;
|
||||
for (; d < n_channel - (n_channel % bVec::size()); d += bVec::size()) {
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
fVec sum_fvec0 = fVec::loadu(buffer_ptr + d) + data_fvec0;
|
||||
fVec sum_fvec1 = fVec::loadu(buffer_ptr + d + fVec::size()) + data_fvec1;
|
||||
sum_fvec0.store(buffer_ptr + d);
|
||||
@ -960,10 +955,8 @@ inline void batch_norm_cpu_collect_stats_channels_last_internal(
|
||||
int64_t d = 0;
|
||||
for (; d < n_channel - (n_channel % bVec::size()); d += bVec::size()) {
|
||||
bVec data_bvec = bVec::loadu(input_ptr + d);
|
||||
fVec data_fvec0, data_fvec1;
|
||||
std::tie(data_fvec0, data_fvec1) = convert_to_float<scalar_t>(data_bvec);
|
||||
fVec mean_fvec0, mean_fvec1;
|
||||
std::tie(mean_fvec0, mean_fvec1) = load2f(mean_data + d);
|
||||
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
|
||||
auto [mean_fvec0, mean_fvec1] = load2f(mean_data + d);
|
||||
fVec var_fvec0 = fVec::loadu(buffer_ptr + d);
|
||||
fVec var_fvec1 = fVec::loadu(buffer_ptr + d + fVec::size());
|
||||
var_fvec0 += (data_fvec0 - mean_fvec0) * (data_fvec0 - mean_fvec0);
|
||||
@ -1053,14 +1046,12 @@ void batch_norm_cpu_backward_contiguous_internal(Tensor& grad_input, Tensor& gra
|
||||
int64_t d = 0;
|
||||
for (; d < image_size - (image_size % bVec::size()); d += bVec::size()) {
|
||||
bVec dy_bvec = bVec::loadu(dy_ptr + d);
|
||||
fVec dy_fvec0, dy_fvec1;
|
||||
std::tie(dy_fvec0, dy_fvec1) = convert_to_float<scalar_t>(dy_bvec);
|
||||
auto [dy_fvec0, dy_fvec1] = convert_to_float<scalar_t>(dy_bvec);
|
||||
sum_fvec += dy_fvec0;
|
||||
sum_fvec += dy_fvec1;
|
||||
|
||||
bVec x_bvec = bVec::loadu(x_ptr + d);
|
||||
fVec x_fvec0, x_fvec1;
|
||||
std::tie(x_fvec0, x_fvec1) = convert_to_float<scalar_t>(x_bvec);
|
||||
auto [x_fvec0, x_fvec1] = convert_to_float<scalar_t>(x_bvec);
|
||||
dotp_fvec += (x_fvec0 - fVec(mean)) * dy_fvec0;
|
||||
dotp_fvec += (x_fvec1 - fVec(mean)) * dy_fvec1;
|
||||
}
|
||||
@ -1188,16 +1179,14 @@ void batch_norm_cpu_backward_channels_last_internal(Tensor& grad_input, Tensor&
|
||||
int64_t d = 0;
|
||||
for(; d < n_channel - (n_channel % bVec::size()); d += bVec::size()) {
|
||||
bVec dy_bvec = bVec::loadu(dy_ptr + d);
|
||||
fVec dy_fvec0, dy_fvec1;
|
||||
std::tie(dy_fvec0, dy_fvec1) = convert_to_float<scalar_t>(dy_bvec);
|
||||
auto [dy_fvec0, dy_fvec1] = convert_to_float<scalar_t>(dy_bvec);
|
||||
fVec sum_fvec0 = dy_fvec0 + fVec::loadu(sum_ptr + d);
|
||||
fVec sum_fvec1 = dy_fvec1 + fVec::loadu(sum_ptr + d + fVec::size());
|
||||
sum_fvec0.store(sum_ptr + d);
|
||||
sum_fvec1.store(sum_ptr + d + fVec::size());
|
||||
|
||||
bVec x_bvec = bVec::loadu(x_ptr + d);
|
||||
fVec x_fvec0, x_fvec1;
|
||||
std::tie(x_fvec0, x_fvec1) = convert_to_float<scalar_t>(x_bvec);
|
||||
auto [x_fvec0, x_fvec1] = convert_to_float<scalar_t>(x_bvec);
|
||||
fVec mean_fvec0 = fVec::loadu(mean_data + d);
|
||||
fVec mean_fvec1 = fVec::loadu(mean_data + d + fVec::size());
|
||||
fVec dotp_fvec0 = fVec::loadu(dotp_ptr + d);
|
||||
@ -1246,8 +1235,7 @@ void batch_norm_cpu_backward_channels_last_internal(Tensor& grad_input, Tensor&
|
||||
int64_t d = 0;
|
||||
for (; d < n_channel - (n_channel % bVec::size()); d += bVec::size()) {
|
||||
bVec x_bvec = bVec::loadu(x_ptr + d);
|
||||
fVec x_fvec0, x_fvec1;
|
||||
std::tie(x_fvec0, x_fvec1) = convert_to_float<scalar_t>(x_bvec);
|
||||
auto [x_fvec0, x_fvec1] = convert_to_float<scalar_t>(x_bvec);
|
||||
fVec mean_fvec0 = fVec::loadu(mean_data + d);
|
||||
fVec mean_fvec1 = fVec::loadu(mean_data + d + fVec::size());
|
||||
fVec dotp_fvec0 = fVec::loadu(dotp_data + d);
|
||||
@ -1259,8 +1247,7 @@ void batch_norm_cpu_backward_channels_last_internal(Tensor& grad_input, Tensor&
|
||||
fVec dx_fvec0 = (x_fvec0 - mean_fvec0) * k_fvec0;
|
||||
fVec dx_fvec1 = (x_fvec1 - mean_fvec1) * k_fvec1;
|
||||
bVec dy_bvec = bVec::loadu(dy_ptr + d);
|
||||
fVec dy_fvec0, dy_fvec1;
|
||||
std::tie(dy_fvec0, dy_fvec1) = convert_to_float<scalar_t>(dy_bvec);
|
||||
auto [dy_fvec0, dy_fvec1] = convert_to_float<scalar_t>(dy_bvec);
|
||||
fVec grad_mean_fvec0 = fVec::loadu(sum_data + d) / fVec(N);
|
||||
fVec grad_mean_fvec1 = fVec::loadu(sum_data + d + fVec::size()) / fVec(N);
|
||||
fVec w_fvec0 = fVec::loadu(weight_data + d);
|
||||
@ -1287,8 +1274,7 @@ void batch_norm_cpu_backward_channels_last_internal(Tensor& grad_input, Tensor&
|
||||
int64_t d = 0;
|
||||
for (; d < n_channel - (n_channel % bVec::size()); d += bVec::size()) {
|
||||
bVec dy_bvec = bVec::loadu(dy_ptr + d);
|
||||
fVec dy_fvec0, dy_fvec1;
|
||||
std::tie(dy_fvec0, dy_fvec1) = convert_to_float<scalar_t>(dy_bvec);
|
||||
auto [dy_fvec0, dy_fvec1] = convert_to_float<scalar_t>(dy_bvec);
|
||||
fVec invstd_fvec0 = fVec::loadu(invstd_data + d);
|
||||
fVec invstd_fvec1 = fVec::loadu(invstd_data + d + fVec::size());
|
||||
fVec w_fvec0 = fVec::loadu(weight_data + d);
|
||||
|
@ -93,8 +93,7 @@ UpdateMomentsVec(
|
||||
fVec m2_fvec0(0), m2_fvec1(0);
|
||||
for (const auto j : c10::irange(m0)) {
|
||||
const Vec x_bvec = Vec::loadu(X_ptr + j * Vec::size());
|
||||
fVec x_fvec0, x_fvec1;
|
||||
std::tie(x_fvec0, x_fvec1) = convert_to_float<T>(x_bvec);
|
||||
auto [x_fvec0, x_fvec1] = convert_to_float<T>(x_bvec);
|
||||
const fVec delta_fvec0 = x_fvec0 - m1_fvec0;
|
||||
const fVec delta_fvec1 = x_fvec1 - m1_fvec1;
|
||||
m1_fvec0 += delta_fvec0 * c_vecs[j];
|
||||
|
@ -61,8 +61,7 @@ struct Vec2 {
|
||||
Vec2(Vectorized<float> v0, Vectorized<float> v1) : val0(v0), val1(v1) {}
|
||||
Vec2(float v) : val0(v), val1(v) {}
|
||||
static Vec2 loadu(const BFloat16* ptr) {
|
||||
Vectorized<float> v0, v1;
|
||||
std::tie(v0, v1) = convert_bfloat16_float(Vectorized<BFloat16>::loadu(ptr));
|
||||
auto [v0, v1] = convert_bfloat16_float(Vectorized<BFloat16>::loadu(ptr));
|
||||
return {v0, v1};
|
||||
}
|
||||
static Vec2 loadu(const float* ptr) {
|
||||
|
@ -50,9 +50,7 @@ std::tuple<Tensor, Tensor> sort_quantized_cpu_stable(
|
||||
c10::optional<bool> stable,
|
||||
int64_t dim,
|
||||
bool descending) {
|
||||
Tensor sort_int;
|
||||
Tensor sort_indicies;
|
||||
std::tie(sort_int, sort_indicies) =
|
||||
auto [sort_int, sort_indicies] =
|
||||
at::sort(self.int_repr(), stable, dim, descending);
|
||||
return std::forward_as_tuple(
|
||||
at::_make_per_tensor_quantized_tensor(
|
||||
|
@ -187,10 +187,9 @@ Tensor q_avg_pool2d(
|
||||
bool count_include_pad,
|
||||
c10::optional<int64_t> divisor_override) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int kW, kH, dW, dH, padW, padH;
|
||||
std::tie(kW, kH) = get_kernel(kernel_size);
|
||||
std::tie(dW, dH) = get_stride(stride, kW, kH);
|
||||
std::tie(padW, padH) = get_padding(padding);
|
||||
auto [kW, kH] = get_kernel(kernel_size);
|
||||
auto [dW, dH] = get_stride(stride, kW, kH);
|
||||
auto [padW, padH] = get_padding(padding);
|
||||
|
||||
const int64_t nbatch = input.ndimension() == 4 ? input.size(-4) : 1;
|
||||
const int64_t nInputPlane = input.size(-3);
|
||||
@ -267,12 +266,9 @@ Tensor qnnpack_avg_pool2d(
|
||||
bool ceil_mode,
|
||||
bool count_include_pad,
|
||||
c10::optional<int64_t> divisor_override) {
|
||||
Tensor output;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int kW, kH, dW, dH, padW, padH;
|
||||
std::tie(kW, kH) = get_kernel(kernel_size);
|
||||
std::tie(dW, dH) = get_stride(stride, kW, kH);
|
||||
std::tie(padW, padH) = get_padding(padding);
|
||||
auto [kW, kH] = get_kernel(kernel_size);
|
||||
auto [dW, dH] = get_stride(stride, kW, kH);
|
||||
auto [padW, padH] = get_padding(padding);
|
||||
TORCH_CHECK(
|
||||
input.ndimension() == 4,
|
||||
"qnnpack_avg_pool2d(): Expected input to be 4-dimensional: got ",
|
||||
@ -303,7 +299,7 @@ Tensor qnnpack_avg_pool2d(
|
||||
oH > 0 && oW > 0,
|
||||
"qnnpack_avg_pool2d(): the resulting output Tensor size should be >= 0");
|
||||
// NHWC output
|
||||
output = at::_empty_affine_quantized(
|
||||
auto output = at::_empty_affine_quantized(
|
||||
output_shape,
|
||||
at::device(kCPU).dtype(kQUInt8),
|
||||
scale,
|
||||
|
@ -101,11 +101,9 @@ Tensor q_avg_pool3d(
|
||||
bool ceil_mode,
|
||||
bool count_include_pad,
|
||||
c10::optional<int64_t> divisor_override) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int kD, kW, kH, dD, dW, dH, padD, padW, padH;
|
||||
std::tie(kW, kH, kD) = get_kernel(kernel_size);
|
||||
std::tie(dW, dH, dD) = get_stride(stride, kW, kH, kD);
|
||||
std::tie(padW, padH, padD) = get_padding(padding);
|
||||
auto [kW, kH, kD] = get_kernel(kernel_size);
|
||||
auto [dW, dH, dD] = get_stride(stride, kW, kH, kD);
|
||||
auto [padW, padH, padD] = get_padding(padding);
|
||||
|
||||
const int64_t nbatch = input.ndimension() == 5 ? input.size(-5) : 1;
|
||||
const int64_t nInputPlane = input.size(-4);
|
||||
|
@ -236,9 +236,7 @@ ConvParamsSerializationTypeV2 serialize_conv(
|
||||
// clone to retain ownership of the data
|
||||
.clone();
|
||||
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = params->unpack();
|
||||
auto [weight, bias] = params->unpack();
|
||||
|
||||
non_optional.emplace_back(std::move(params_tensor));
|
||||
non_optional.emplace_back(std::move(weight));
|
||||
@ -267,9 +265,7 @@ ConvParamsSerializationTypeV3 serialize_conv(
|
||||
config_vals.push_back(params->groups());
|
||||
config_vals.push_back(params->transpose());
|
||||
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = params->unpack();
|
||||
auto [weight, bias] = params->unpack();
|
||||
|
||||
std::vector<c10::optional<at::Tensor>> tensors;
|
||||
tensors.emplace_back();
|
||||
@ -287,12 +283,7 @@ ConvParamsSerializationTypeV3 serialize_conv(
|
||||
template <uint32_t kSpatialDim>
|
||||
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv(
|
||||
ConvParamsSerializationTypeV3 state) {
|
||||
|
||||
int64_t version;
|
||||
std::vector<int64_t> config_vals;
|
||||
std::vector<c10::optional<at::Tensor>> tensors;
|
||||
|
||||
std::tie(version, config_vals, tensors) = state;
|
||||
auto [version, config_vals, tensors] = state;
|
||||
TORCH_INTERNAL_ASSERT(version == 3, "Unexpected serialized qconv version: ", version);
|
||||
|
||||
TORCH_CHECK(tensors.size() == 3, "Wrong number of tensors", tensors.size());
|
||||
|
@ -410,16 +410,10 @@ register_conv_params() {
|
||||
return deserialize_conv<kSpatialDim>(state);
|
||||
})
|
||||
.def("weight", [](const c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>>& self) {
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = self->unpack();
|
||||
return weight;
|
||||
return std::get<0>(self->unpack());
|
||||
})
|
||||
.def("bias", [](const c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>>& self) {
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = self->unpack();
|
||||
return bias;
|
||||
return std::get<1>(self->unpack());
|
||||
})
|
||||
.def("unpack", &ConvPackedParamsBase<kSpatialDim>::unpack)
|
||||
.def("stride", &ConvPackedParamsBase<kSpatialDim>::stride)
|
||||
@ -446,10 +440,7 @@ TORCH_API int register_linear_params() {
|
||||
.def_pickle(
|
||||
[](const c10::intrusive_ptr<LinearPackedParamsBase>& params)
|
||||
-> SerializationType { // __getstate__
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = params->unpack();
|
||||
return std::make_tuple(std::move(weight), std::move(bias));
|
||||
return params->unpack();
|
||||
},
|
||||
[](SerializationType state)
|
||||
-> c10::intrusive_ptr<
|
||||
@ -501,10 +492,7 @@ TORCH_API int register_linear_params() {
|
||||
TORCH_CHECK(false, "Unknown qengine");
|
||||
})
|
||||
.def("bias", [](const c10::intrusive_ptr<LinearPackedParamsBase>& self) {
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = self->unpack();
|
||||
return bias;
|
||||
return std::get<1>(self->unpack());
|
||||
})
|
||||
.def("unpack", &LinearPackedParamsBase::unpack);
|
||||
// (1) we can't (easily) return the static initializer itself because it can have a different type because of selective build
|
||||
@ -548,12 +536,7 @@ int register_embedding_params() {
|
||||
[](EmbeddingParamsSerializationType state)
|
||||
-> c10::intrusive_ptr<EmbeddingPackedParamsBase> { // __setstate__ call
|
||||
|
||||
std::vector<at::Tensor> tensors;
|
||||
std::vector<double> doubles;
|
||||
std::vector<int64_t> longs;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int64_t version;
|
||||
std::tie(version, tensors, doubles, longs) = std::move(state);
|
||||
auto [version, tensors, doubles, longs] = std::move(state);
|
||||
|
||||
TORCH_INTERNAL_ASSERT(tensors.size() == 1, "EmbeddingPackedParams: Expected weight tensor to be serialized");
|
||||
TORCH_INTERNAL_ASSERT(longs.size() == 1, "EmbeddingPackedParams: Expected bit_rate to be serialized");
|
||||
|
@ -283,9 +283,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightsQnnp<
|
||||
auto kernel_dim = kSpatialDim == 2
|
||||
? std::vector<int64_t>{kernel_h, kernel_w}
|
||||
: std::vector<int64_t>{kernel_d, kernel_h, kernel_w};
|
||||
std::vector<uint8_t> w_zero_points;
|
||||
at::Tensor w_scales;
|
||||
std::tie(w_zero_points, w_scales) =
|
||||
auto [w_zero_points, w_scales] =
|
||||
make_zero_points_and_scales_tensor(weight_contig, transpose, groups);
|
||||
// We set the pre-packed conv weights to nullptr below as we call pre-pack
|
||||
// during the first invocation of operator run. Refer to qconv.cpp for more
|
||||
|
@ -436,8 +436,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
float Xmin, Xmax;
|
||||
if (optimized_qparams) {
|
||||
at::Tensor xmax_tensor, xmin_tensor;
|
||||
std::tie(xmax_tensor, xmin_tensor) = at::choose_qparams_optimized(
|
||||
auto [xmax_tensor, xmin_tensor] = at::choose_qparams_optimized(
|
||||
float_weight[row], embedding_cols, nbins, ratio, bit_width);
|
||||
TORCH_CHECK(
|
||||
xmax_tensor.numel() == 1 && xmin_tensor.numel() == 1,
|
||||
|
@ -520,8 +520,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_dynamic_impl(
|
||||
/*len=*/input.numel());
|
||||
#else
|
||||
if (input_contig.numel() > 0) {
|
||||
Tensor t_min, t_max;
|
||||
std::tie(t_min, t_max) = at::aminmax(input_contig);
|
||||
auto [t_min, t_max] = at::aminmax(input_contig);
|
||||
x_max = t_max.item<float>();
|
||||
x_min = t_min.item<float>();
|
||||
}
|
||||
|
@ -157,9 +157,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeightsQnnp::prepack(
|
||||
" instead");
|
||||
|
||||
at::Tensor weight_contig = weight.contiguous();
|
||||
std::vector<uint8_t> w_zero_points;
|
||||
at::Tensor w_scales;
|
||||
std::tie(w_zero_points, w_scales) =
|
||||
auto [w_zero_points, w_scales] =
|
||||
make_zero_points_and_scales_tensor(weight_contig);
|
||||
|
||||
at::native::initQNNPACK();
|
||||
|
@ -181,9 +181,7 @@ class QConvTranspose final {
|
||||
IValue
|
||||
unpack_quantized_prepacked_sizes_conv2d(const IValue& ivalue) {
|
||||
auto params = ivalue.toCustomClass<ConvPackedParamsBase<2>>();
|
||||
at::Tensor weight;
|
||||
c10::optional<at::Tensor> bias;
|
||||
std::tie(weight, bias) = params->unpack();
|
||||
auto [weight, bias] = params->unpack();
|
||||
at::OptionalIntArrayRef bias_sizes = c10::nullopt;
|
||||
if (bias && bias->defined()) {
|
||||
bias_sizes = bias->sizes();
|
||||
|
@ -174,8 +174,7 @@ Tensor& _compressed_row_strided_mm_out(const Tensor& compressed, const Tensor& s
|
||||
values.unsqueeze_(-1).unsqueeze_(-1);
|
||||
}
|
||||
|
||||
Tensor compressed_indices, plain_indices;
|
||||
std::tie(compressed_indices, plain_indices) = at::sparse_csr::getCompressedPlainIndices(compressed);
|
||||
auto [compressed_indices, plain_indices] = at::sparse_csr::getCompressedPlainIndices(compressed);
|
||||
|
||||
// Select block rows of the strided input that intersect with the block columns of the sparse input.
|
||||
auto strided_tiled_selected_rows = strided_tiled.index_select(-4, plain_indices);
|
||||
|
@ -878,9 +878,7 @@ Tensor select_sparse_csr_worker(const Tensor& self, int64_t dim, int64_t index)
|
||||
new_sizes.erase(new_sizes.begin() + dim);
|
||||
auto options = self.options();
|
||||
|
||||
Tensor plain_indices;
|
||||
Tensor compressed_indices;
|
||||
std::tie(compressed_indices, plain_indices) =
|
||||
auto [compressed_indices, plain_indices] =
|
||||
AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS(
|
||||
self.layout(),
|
||||
"select",
|
||||
|
@ -222,8 +222,7 @@ Tensor intersection_binary_op_with_wrapped_scalar(const Tensor& sparse, const Te
|
||||
// NOTE: intersection_binary_op_with_wrapped_scalar assumes scalar.numel() == 1.
|
||||
const auto result_values = op(sparse.values(), scalar.squeeze()).to(at::result_type(sparse, scalar));
|
||||
const auto result_sizes = infer_size(sparse.sizes(), scalar.sizes());
|
||||
Tensor compressed_indices, plain_indices;
|
||||
std::tie(compressed_indices, plain_indices) = getCompressedPlainIndices(sparse);
|
||||
auto [compressed_indices, plain_indices] = getCompressedPlainIndices(sparse);
|
||||
return at::_sparse_compressed_tensor_unsafe(
|
||||
compressed_indices.clone(),
|
||||
plain_indices.clone(),
|
||||
@ -356,8 +355,7 @@ Tensor sparse_mask_sparse_compressed(
|
||||
}
|
||||
|
||||
if (self.layout() == kStrided) {
|
||||
Tensor compressed_indices, plain_indices;
|
||||
std::tie(compressed_indices, plain_indices) = at::sparse_csr::getCompressedPlainIndices(mask);
|
||||
auto [compressed_indices, plain_indices] = at::sparse_csr::getCompressedPlainIndices(mask);
|
||||
auto mask_values = mask.values();
|
||||
auto dense_mask = at::_sparse_compressed_tensor_unsafe(
|
||||
compressed_indices,
|
||||
@ -1066,8 +1064,6 @@ Tensor reduce_sparse_csr_dim0_cpu_template(const Tensor& sparse, ReductionOp rop
|
||||
Tensor col_indices = sparse.col_indices();
|
||||
Tensor values = sparse.values();
|
||||
auto numel = values.numel();
|
||||
Tensor new_col_indices;
|
||||
Tensor columns_map;
|
||||
|
||||
/*
|
||||
Calling at::_unique constitutes the main bottleneck of this
|
||||
@ -1075,7 +1071,7 @@ Tensor reduce_sparse_csr_dim0_cpu_template(const Tensor& sparse, ReductionOp rop
|
||||
invariant:
|
||||
csr.sum(dim=0) == csr.transpose(0, 1).sum(dim=1)
|
||||
*/
|
||||
std::tie(new_col_indices, columns_map) = at::_unique(col_indices, true, true);
|
||||
auto [new_col_indices, columns_map] = at::_unique(col_indices, true, true);
|
||||
auto nnz = new_col_indices.numel();
|
||||
|
||||
Tensor new_crow_indices = at::empty({2}, col_indices.options());
|
||||
|
@ -629,9 +629,7 @@ SparseTensor _coalesce_sparse_cpu(const SparseTensor& self) {
|
||||
Tensor newValues = at::empty(values.sizes(), values.options());
|
||||
alias_into_sparse(dst, newIndices, newValues);
|
||||
|
||||
Tensor indicesBuffer;
|
||||
Tensor indicesPermutation;
|
||||
std::tie(indicesBuffer, indicesPermutation) = indices_scalar.sort(0);
|
||||
auto [indicesBuffer, indicesPermutation] = indices_scalar.sort(0);
|
||||
// NB: The accessor accesses here rely on self._nnz() > 0 (tested earlier in
|
||||
// this function)
|
||||
auto newIndicesAccessor = newIndices.accessor<int64_t, 2>();
|
||||
@ -729,11 +727,7 @@ static std::tuple<Tensor, Tensor, OptTensor> sparse_mask_like_prepare_sparse_inp
|
||||
return res;
|
||||
};
|
||||
|
||||
Tensor lhs;
|
||||
OptTensor lhs_hash_opt;
|
||||
bool lhs_is_movable;
|
||||
|
||||
std::tie(lhs, lhs_hash_opt, lhs_is_movable) = [&]() -> auto {
|
||||
auto [lhs, lhs_hash_opt, lhs_is_movable] = [&]() -> auto {
|
||||
if (t.is_coalesced()) {
|
||||
return std::make_tuple(t, static_cast<OptTensor>(c10::nullopt), false);
|
||||
} else {
|
||||
@ -782,9 +776,7 @@ SparseTensor sparse_mask(const Tensor& t, const SparseTensor& mask) {
|
||||
}
|
||||
|
||||
auto res = at::empty({0}, t.options());
|
||||
Tensor lhs, rhs;
|
||||
OptTensor lhs_hash_opt;
|
||||
std::tie(lhs, rhs, lhs_hash_opt) = sparse_mask_like_prepare_sparse_inputs("sparse_mask", t, mask);
|
||||
auto [lhs, rhs, lhs_hash_opt] = sparse_mask_like_prepare_sparse_inputs("sparse_mask", t, mask);
|
||||
sparse_mask_intersection_out_stub(res.device().type(), res, lhs, rhs, lhs_hash_opt);
|
||||
return res._coalesced_(mask.is_coalesced());
|
||||
}
|
||||
@ -815,9 +807,7 @@ Tensor sparse_mask_projection(const Tensor& t, const Tensor& mask, bool accumula
|
||||
}
|
||||
|
||||
auto res = at::empty({0}, t.options());
|
||||
Tensor lhs, rhs;
|
||||
OptTensor lhs_hash_opt;
|
||||
std::tie(lhs, rhs, lhs_hash_opt) = sparse_mask_like_prepare_sparse_inputs("_sparse_mask_projection", mask, t);
|
||||
auto [lhs, rhs, lhs_hash_opt] = sparse_mask_like_prepare_sparse_inputs("_sparse_mask_projection", mask, t);
|
||||
sparse_mask_projection_out_stub(res.device().type(), res, lhs, rhs, lhs_hash_opt, accumulate_matches);
|
||||
return res._coalesced_(t.is_coalesced());
|
||||
}
|
||||
|
@ -540,8 +540,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> _scaled_dot_product_flash_attenti
|
||||
Tensor grad_out_t = grad_out_.transpose(1,2);
|
||||
Tensor out_t = out.transpose(1,2);
|
||||
|
||||
Tensor grad_q, grad_k, grad_v;
|
||||
std::tie(grad_q, grad_k, grad_v) = at::_flash_attention_backward(
|
||||
auto [grad_q, grad_k, grad_v] = at::_flash_attention_backward(
|
||||
grad_out_t,
|
||||
q_t,
|
||||
k_t,
|
||||
|
@ -1500,14 +1500,11 @@ TEST(TestAutogradNotImplementedFallback, RetTupleNonTensor) {
|
||||
auto opHandle = c10::Dispatcher::singleton().findSchemaOrThrow(
|
||||
"_test::ret_tuple_non_tensor", "");
|
||||
auto op = [&](const torch::Tensor& _1, const torch::Tensor& _2) {
|
||||
torch::Tensor out0;
|
||||
torch::Tensor out1;
|
||||
int64_t out2;
|
||||
auto out = callOpUnboxed<
|
||||
std::tuple<torch::Tensor, torch::Tensor, int64_t>,
|
||||
const torch::Tensor&,
|
||||
const torch::Tensor&>(opHandle, _1, _2);
|
||||
std::tie(out0, out1, out2) = std::move(out);
|
||||
auto [out0, out1, out2] = std::move(out);
|
||||
return out0;
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user