mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Enable Detectron model inference for CPU and MKL-DNN paths (#10157)
Summary: 1. Support ops needed for inference of Faster-RCNN/Mask-RCNN needed in Detectron, mostly direct fallbacks. 2. Use CPU device to hold 0-dim tensors and integer tensors in both fallback op and blob feeder, needed by Detectron models. 3. Ignore 0-dim tensor in MKL-DNN concat operator. 4. Generate dynamic library of Detectron module for CPU device. This PR obsoletes #9164. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10157 Differential Revision: D9276837 Pulled By: yinghai fbshipit-source-id: dc364932ae4a2e7fcefdee70b5fce3c0cee91b6f
This commit is contained in:
committed by
Facebook Github Bot
parent
89834dfe64
commit
c755616e00
@ -11,4 +11,8 @@ if (USE_CUDA)
|
||||
|
||||
target_link_libraries(caffe2_detectron_ops_gpu caffe2_gpu)
|
||||
install(TARGETS caffe2_detectron_ops_gpu DESTINATION lib)
|
||||
elseif(NOT IOS_PLATFORM)
|
||||
add_library(caffe2_detectron_ops SHARED ${Detectron_CPU_SRCS})
|
||||
target_link_libraries(caffe2_detectron_ops caffe2)
|
||||
install(TARGETS caffe2_detectron_ops DESTINATION lib)
|
||||
endif()
|
||||
|
@ -15,9 +15,19 @@
|
||||
*/
|
||||
|
||||
#include "batch_permutation_op.h"
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
BatchPermutation,
|
||||
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
|
||||
#endif
|
||||
|
||||
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
BatchPermutationGradient,
|
||||
|
@ -15,8 +15,17 @@
|
||||
*/
|
||||
|
||||
#include "upsample_nearest_op.h"
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#include "caffe2/ideep/operators/operator_fallback_ideep.h"
|
||||
#include "caffe2/ideep/utils/ideep_operator.h"
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
UpsampleNearest,
|
||||
IDEEPFallbackOp<UpsampleNearestOp<float, CPUContext>>);
|
||||
#endif
|
||||
|
||||
REGISTER_CPU_OPERATOR(UpsampleNearest, UpsampleNearestOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
|
@ -35,8 +35,50 @@ class UpsampleNearestOp final : public Operator<Context> {
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice() override {
|
||||
// No CPU implementation for now
|
||||
CAFFE_NOT_IMPLEMENTED;
|
||||
auto translate_idx = [](int ii, int d1, int d2, int d3, int scale_factor) {
|
||||
int x, y, z, w;
|
||||
w = ii % d3;
|
||||
ii = ii/d3;
|
||||
z = ii % d2;
|
||||
ii = ii/d2;
|
||||
y = ii % d1;
|
||||
ii = ii/d1;
|
||||
x = ii;
|
||||
w = w/scale_factor;
|
||||
z = z/scale_factor;
|
||||
d2 /= scale_factor;
|
||||
d3 /= scale_factor;
|
||||
return (((x*d1+y)*d2)+z)*d3+w;
|
||||
};
|
||||
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
auto out_shape = X.dims();
|
||||
out_shape[X.ndim() - 1] *= scale_;
|
||||
out_shape[X.ndim() - 2] *= scale_;
|
||||
Y->Resize(out_shape);
|
||||
|
||||
int d1;
|
||||
int d2;
|
||||
int d3;
|
||||
if (X.ndim() == 3) {
|
||||
d1 = Y->dim32(0);
|
||||
d2 = Y->dim32(1);
|
||||
d3 = Y->dim32(2);
|
||||
} else {
|
||||
d1 = Y->dim32(1);
|
||||
d2 = Y->dim32(2);
|
||||
d3 = Y->dim32(3);
|
||||
}
|
||||
|
||||
const T *input_data = X.template data<T>();
|
||||
T *output_data = Y->template mutable_data<T>();
|
||||
|
||||
for (int ii = 0; ii < Y->size(); ii++) {
|
||||
int ipidx = translate_idx(ii, d1, d2, d3, scale_);
|
||||
output_data[ii] = input_data[ipidx];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
Reference in New Issue
Block a user