mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
212 lines
7.5 KiB
C++
212 lines
7.5 KiB
C++
#include "bbox_transform_op.h"
|
|
#include "caffe2/operators/generate_proposals_op_util_boxes.h"
|
|
|
|
namespace caffe2 {
|
|
namespace {
|
|
|
|
REGISTER_CPU_OPERATOR(BBoxTransform, BBoxTransformOp<float, CPUContext>);
|
|
|
|
// Input: box, delta Output: box
|
|
OPERATOR_SCHEMA(BBoxTransform)
|
|
.NumInputs(3)
|
|
.NumOutputs(1, 2)
|
|
.SetDoc(R"DOC(
|
|
Transform proposal bounding boxes to target bounding box using bounding box
|
|
regression deltas.
|
|
)DOC")
|
|
.Arg("weights", "vector<float> weights [wx, wy, ww, wh] for the deltas")
|
|
.Arg(
|
|
"apply_scale",
|
|
"bool (default true), transform the boxes to the scaled image space"
|
|
" after applying the bbox deltas."
|
|
"Set to false to match the detectron code, set to true for keypoint"
|
|
" models and for backward compatibility")
|
|
.Arg(
|
|
"rotated",
|
|
"bool (default false). If true, then boxes (rois and deltas) include "
|
|
"angle info to handle rotation. The format will be "
|
|
"[ctr_x, ctr_y, width, height, angle (in degrees)].")
|
|
.Arg(
|
|
"angle_bound_on",
|
|
"bool (default true). If set, for rotated boxes, angle is "
|
|
"normalized to be within [angle_bound_lo, angle_bound_hi].")
|
|
.Arg(
|
|
"angle_bound_lo",
|
|
"int (default -90 degrees). If set, for rotated boxes, angle is "
|
|
"normalized to be within [angle_bound_lo, angle_bound_hi].")
|
|
.Arg(
|
|
"angle_bound_hi",
|
|
"int (default 90 degrees). If set, for rotated boxes, angle is "
|
|
"normalized to be within [angle_bound_lo, angle_bound_hi].")
|
|
.Arg(
|
|
"clip_angle_thresh",
|
|
"float (default 1.0 degrees). For RRPN, clip almost horizontal boxes "
|
|
"within this threshold of tolerance for backward compatibility. "
|
|
"Set to negative value for no clipping.")
|
|
.Input(
|
|
0,
|
|
"rois",
|
|
"Bounding box proposals in pixel coordinates, "
|
|
"Size (M, 4), format [x1, y1, x2, y2], or"
|
|
"Size (M, 5), format [batch_index, x1, y1, x2, y2]. "
|
|
"If proposals from multiple images in a batch are present, they "
|
|
"should be grouped sequentially and in incremental order."
|
|
"For rotated boxes, this would have an additional angle (in degrees) "
|
|
"in the format [<optionaal_batch_id>, ctr_x, ctr_y, w, h, angle].")
|
|
.Input(
|
|
1,
|
|
"deltas",
|
|
"bounding box translations and scales,"
|
|
"size (M, 4*K), format [dx, dy, dw, dh], K = # classes. "
|
|
"For rotated boxes, size (M, 5*K, format [dx, dy, dw, dh, da].")
|
|
.Input(
|
|
2,
|
|
"im_info",
|
|
"Image dimensions, size (batch_size, 3), "
|
|
"format [img_height, img_width, img_scale]")
|
|
.Output(
|
|
0,
|
|
"box_out",
|
|
"Pixel coordinates of the transformed bounding boxes,"
|
|
"Size (M, 4*K), format [x1, y1, x2, y2]. "
|
|
"For rotated boxes, size (M, 5*K), "
|
|
"format [ctr_x, ctr_y, w, h, angle].")
|
|
.Output(
|
|
1,
|
|
"roi_batch_splits",
|
|
"Tensor of shape (batch_size) with each element denoting the number "
|
|
"of RoIs belonging to the corresponding image in batch");
|
|
|
|
SHOULD_NOT_DO_GRADIENT(BBoxTransform);
|
|
} // namespace
|
|
|
|
template <>
|
|
bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
|
|
const auto& roi_in = Input(0);
|
|
const auto& delta_in = Input(1);
|
|
const auto& iminfo_in = Input(2);
|
|
|
|
const int box_dim = rotated_ ? 5 : 4;
|
|
const int N = roi_in.dim32(0);
|
|
CAFFE_ENFORCE_EQ(roi_in.dim(), 2);
|
|
CAFFE_ENFORCE(roi_in.dim32(1) == box_dim || roi_in.dim32(1) == box_dim + 1);
|
|
|
|
CAFFE_ENFORCE_EQ(delta_in.dim(), 2);
|
|
CAFFE_ENFORCE_EQ(delta_in.dim32(0), N);
|
|
CAFFE_ENFORCE_EQ(delta_in.dim32(1) % box_dim, 0);
|
|
const int num_classes = delta_in.dim32(1) / box_dim;
|
|
|
|
CAFFE_ENFORCE_EQ(iminfo_in.dim(), 2);
|
|
CAFFE_ENFORCE_EQ(iminfo_in.dim32(1), 3);
|
|
const int batch_size = iminfo_in.dim32(0);
|
|
|
|
DCHECK_EQ(weights_.size(), 4);
|
|
|
|
Eigen::Map<const ERArrXXf> boxes0(
|
|
roi_in.data<float>(), roi_in.dim32(0), roi_in.dim32(1));
|
|
Eigen::Map<const ERArrXXf> deltas0(
|
|
delta_in.data<float>(), delta_in.dim32(0), delta_in.dim32(1));
|
|
|
|
// Count the number of RoIs per batch
|
|
vector<int> num_rois_per_batch(batch_size, 0);
|
|
if (roi_in.dim32(1) == box_dim) {
|
|
CAFFE_ENFORCE_EQ(batch_size, 1);
|
|
num_rois_per_batch[0] = N;
|
|
} else {
|
|
const auto& roi_batch_ids = boxes0.col(0);
|
|
for (int i = 0; i < roi_batch_ids.size(); ++i) {
|
|
const int roi_batch_id = roi_batch_ids(i);
|
|
CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
|
|
num_rois_per_batch[roi_batch_id]++;
|
|
}
|
|
}
|
|
|
|
CAFFE_ENFORCE_EQ(iminfo_in.sizes(), (at::IntArrayRef{batch_size, 3}));
|
|
Eigen::Map<const ERArrXXf> iminfo(
|
|
iminfo_in.data<float>(), iminfo_in.size(0), iminfo_in.size(1));
|
|
|
|
auto* box_out = Output(0, delta_in.sizes(), at::dtype<float>());
|
|
Eigen::Map<ERArrXXf> new_boxes(
|
|
box_out->template mutable_data<float>(),
|
|
box_out->dim32(0),
|
|
box_out->dim32(1));
|
|
|
|
// We assume roi_in and delta_in over multiple batches are grouped
|
|
// together in increasing order as generated by GenerateProposalsOp
|
|
int offset = 0;
|
|
for (int i = 0; i < batch_size; ++i) {
|
|
const int num_rois = num_rois_per_batch[i];
|
|
const auto& cur_iminfo = iminfo.row(i);
|
|
const float scale_before = cur_iminfo(2);
|
|
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
|
|
const float scale_after = apply_scale_ ? cur_iminfo(2) : 1.0;
|
|
// NOLINTNEXTLINE(bugprone-incorrect-roundings,cppcoreguidelines-avoid-magic-numbers)
|
|
int img_h = int(cur_iminfo(0) / scale_before + 0.5);
|
|
// NOLINTNEXTLINE(bugprone-incorrect-roundings,cppcoreguidelines-avoid-magic-numbers)
|
|
int img_w = int(cur_iminfo(1) / scale_before + 0.5);
|
|
|
|
EArrXXf cur_boxes =
|
|
boxes0.rightCols(box_dim).block(offset, 0, num_rois, box_dim);
|
|
// Do not apply scale for angle in rotated boxes
|
|
cur_boxes.leftCols(4) /= scale_before;
|
|
for (int k = 0; k < num_classes; k++) {
|
|
const auto& cur_deltas =
|
|
deltas0.block(offset, k * box_dim, num_rois, box_dim);
|
|
const auto& trans_boxes = utils::bbox_transform(
|
|
cur_boxes,
|
|
cur_deltas,
|
|
weights_,
|
|
utils::BBOX_XFORM_CLIP_DEFAULT,
|
|
legacy_plus_one_,
|
|
angle_bound_on_,
|
|
angle_bound_lo_,
|
|
angle_bound_hi_);
|
|
EArrXXf clip_boxes = utils::clip_boxes(
|
|
trans_boxes, img_h, img_w, clip_angle_thresh_, legacy_plus_one_);
|
|
// Do not apply scale for angle in rotated boxes
|
|
clip_boxes.leftCols(4) *= scale_after;
|
|
new_boxes.block(offset, k * box_dim, num_rois, box_dim) = clip_boxes;
|
|
}
|
|
|
|
offset += num_rois;
|
|
}
|
|
|
|
if (OutputSize() > 1) {
|
|
auto* roi_batch_splits = Output(1, {batch_size}, at::dtype<float>());
|
|
Eigen::Map<EArrXf> roi_batch_splits_map(
|
|
roi_batch_splits->template mutable_data<float>(), batch_size);
|
|
roi_batch_splits_map =
|
|
Eigen::Map<const EArrXi>(num_rois_per_batch.data(), batch_size)
|
|
.cast<float>();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace caffe2
|
|
|
|
using BBoxTransformOpFloatCPU =
|
|
caffe2::BBoxTransformOp<float, caffe2::CPUContext>;
|
|
|
|
// clang-format off
|
|
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
|
|
BBoxTransform,
|
|
"_caffe2::BBoxTransform("
|
|
"Tensor rois, "
|
|
"Tensor deltas, "
|
|
"Tensor im_info, "
|
|
"float[] weights, "
|
|
"bool apply_scale, "
|
|
"bool rotated, "
|
|
"bool angle_bound_on, "
|
|
"int angle_bound_lo, "
|
|
"int angle_bound_hi, "
|
|
"float clip_angle_thresh, "
|
|
"bool legacy_plus_one"
|
|
") -> ("
|
|
"Tensor output_0, "
|
|
"Tensor output_1"
|
|
")",
|
|
BBoxTransformOpFloatCPU);
|
|
// clang-format on
|