Enable Detectron model inference for CPU and MKL-DNN paths (#10157)

Summary:
1. Support ops needed for inference of Faster-RCNN/Mask-RCNN needed in Detectron, mostly direct fallbacks.
2. Use CPU device to hold 0-dim tensors and integer tensors in both fallback op and blob feeder, needed by Detectron models.
3. Ignore 0-dim tensor in MKL-DNN concat operator.
4. Generate dynamic library of Detectron module for CPU device.

This PR obsoletes #9164.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10157

Differential Revision: D9276837

Pulled By: yinghai

fbshipit-source-id: dc364932ae4a2e7fcefdee70b5fce3c0cee91b6f
This commit is contained in:
jgong5
2018-08-29 14:56:55 -07:00
committed by Facebook Github Bot
parent 89834dfe64
commit c755616e00
9 changed files with 299 additions and 93 deletions

View File

@ -11,4 +11,8 @@ if (USE_CUDA)
target_link_libraries(caffe2_detectron_ops_gpu caffe2_gpu)
install(TARGETS caffe2_detectron_ops_gpu DESTINATION lib)
elseif(NOT IOS_PLATFORM)
add_library(caffe2_detectron_ops SHARED ${Detectron_CPU_SRCS})
target_link_libraries(caffe2_detectron_ops caffe2)
install(TARGETS caffe2_detectron_ops DESTINATION lib)
endif()

View File

@ -15,9 +15,19 @@
*/
#include "batch_permutation_op.h"
#ifdef CAFFE2_USE_IDEEP
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
namespace caffe2 {
#ifdef CAFFE2_USE_IDEEP
REGISTER_IDEEP_OPERATOR(
BatchPermutation,
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
#endif
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
BatchPermutationGradient,

View File

@ -15,8 +15,17 @@
*/
#include "upsample_nearest_op.h"
#ifdef CAFFE2_USE_IDEEP
#include "caffe2/ideep/operators/operator_fallback_ideep.h"
#include "caffe2/ideep/utils/ideep_operator.h"
#endif
namespace caffe2 {
#ifdef CAFFE2_USE_IDEEP
REGISTER_IDEEP_OPERATOR(
UpsampleNearest,
IDEEPFallbackOp<UpsampleNearestOp<float, CPUContext>>);
#endif
REGISTER_CPU_OPERATOR(UpsampleNearest, UpsampleNearestOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(

View File

@ -35,8 +35,50 @@ class UpsampleNearestOp final : public Operator<Context> {
USE_OPERATOR_CONTEXT_FUNCTIONS;
bool RunOnDevice() override {
// No CPU implementation for now
CAFFE_NOT_IMPLEMENTED;
auto translate_idx = [](int ii, int d1, int d2, int d3, int scale_factor) {
int x, y, z, w;
w = ii % d3;
ii = ii/d3;
z = ii % d2;
ii = ii/d2;
y = ii % d1;
ii = ii/d1;
x = ii;
w = w/scale_factor;
z = z/scale_factor;
d2 /= scale_factor;
d3 /= scale_factor;
return (((x*d1+y)*d2)+z)*d3+w;
};
auto& X = Input(0);
auto* Y = Output(0);
auto out_shape = X.dims();
out_shape[X.ndim() - 1] *= scale_;
out_shape[X.ndim() - 2] *= scale_;
Y->Resize(out_shape);
int d1;
int d2;
int d3;
if (X.ndim() == 3) {
d1 = Y->dim32(0);
d2 = Y->dim32(1);
d3 = Y->dim32(2);
} else {
d1 = Y->dim32(1);
d2 = Y->dim32(2);
d3 = Y->dim32(3);
}
const T *input_data = X.template data<T>();
T *output_data = Y->template mutable_data<T>();
for (int ii = 0; ii < Y->size(); ii++) {
int ipidx = translate_idx(ii, d1, d2, d3, scale_);
output_data[ii] = input_data[ipidx];
}
return true;
}
protected: