Enable Detectron model inference for CPU and MKL-DNN paths (#10157)

Summary: 1. Support ops needed for inference of Faster-RCNN/Mask-RCNN needed in Detectron, mostly direct fallbacks. 2. Use CPU device to hold 0-dim tensors and integer tensors in both fallback op and blob feeder, needed by Detectron models. 3. Ignore 0-dim tensor in MKL-DNN concat operator. 4. Generate dynamic library of Detectron module for CPU device. This PR obsoletes #9164. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10157 Differential Revision: D9276837 Pulled By: yinghai fbshipit-source-id: dc364932ae4a2e7fcefdee70b5fce3c0cee91b6f
2025-10-20 21:14:14 +08:00 · 2018-08-29 14:56:55 -07:00
parent 89834dfe64
commit c755616e00
9 changed files with 299 additions and 93 deletions
--- a/modules/detectron/CMakeLists.txt
+++ b/modules/detectron/CMakeLists.txt
@ -11,4 +11,8 @@ if (USE_CUDA)

  target_link_libraries(caffe2_detectron_ops_gpu caffe2_gpu)
  install(TARGETS caffe2_detectron_ops_gpu DESTINATION lib)
+elseif(NOT IOS_PLATFORM)
+  add_library(caffe2_detectron_ops SHARED ${Detectron_CPU_SRCS})
+  target_link_libraries(caffe2_detectron_ops caffe2)
+  install(TARGETS caffe2_detectron_ops DESTINATION lib)
 endif()
--- a/modules/detectron/batch_permutation_op.cc
+++ b/modules/detectron/batch_permutation_op.cc
@ -15,9 +15,19 @@
 */

 #include "batch_permutation_op.h"
+#ifdef CAFFE2_USE_IDEEP
+#include <caffe2/ideep/operators/operator_fallback_ideep.h>
+#include <caffe2/ideep/utils/ideep_operator.h>
+#endif

 namespace caffe2 {

+#ifdef CAFFE2_USE_IDEEP
+REGISTER_IDEEP_OPERATOR(
+    BatchPermutation,
+    IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
+#endif
+
 REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(
    BatchPermutationGradient,
--- a/modules/detectron/upsample_nearest_op.cc
+++ b/modules/detectron/upsample_nearest_op.cc
@ -15,8 +15,17 @@
 */

 #include "upsample_nearest_op.h"
+#ifdef CAFFE2_USE_IDEEP
+#include "caffe2/ideep/operators/operator_fallback_ideep.h"
+#include "caffe2/ideep/utils/ideep_operator.h"
+#endif

 namespace caffe2 {
+#ifdef CAFFE2_USE_IDEEP
+REGISTER_IDEEP_OPERATOR(
+    UpsampleNearest,
+    IDEEPFallbackOp<UpsampleNearestOp<float, CPUContext>>);
+#endif

 REGISTER_CPU_OPERATOR(UpsampleNearest, UpsampleNearestOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(
--- a/modules/detectron/upsample_nearest_op.h
+++ b/modules/detectron/upsample_nearest_op.h
@ -35,8 +35,50 @@ class UpsampleNearestOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;

  bool RunOnDevice() override {
-    // No CPU implementation for now
-    CAFFE_NOT_IMPLEMENTED;
+    auto translate_idx = [](int ii, int d1, int d2, int d3, int scale_factor) {
+      int x, y, z, w;
+      w = ii % d3;
+      ii = ii/d3;
+      z = ii % d2;
+      ii = ii/d2;
+      y = ii % d1;
+      ii = ii/d1;
+      x = ii;
+      w = w/scale_factor;
+      z = z/scale_factor;
+      d2 /= scale_factor;
+      d3 /= scale_factor;
+      return (((x*d1+y)*d2)+z)*d3+w;
+    };
+
+    auto& X = Input(0);
+    auto* Y = Output(0);
+    auto out_shape = X.dims();
+    out_shape[X.ndim() - 1] *= scale_;
+    out_shape[X.ndim() - 2] *= scale_;
+    Y->Resize(out_shape);
+
+    int d1;
+    int d2;
+    int d3;
+    if (X.ndim() == 3) {
+      d1 = Y->dim32(0);
+      d2 = Y->dim32(1);
+      d3 = Y->dim32(2);
+    } else {
+      d1 = Y->dim32(1);
+      d2 = Y->dim32(2);
+      d3 = Y->dim32(3);
+    }
+
+    const T *input_data = X.template data<T>();
+    T *output_data = Y->template mutable_data<T>();
+
+    for (int ii = 0; ii < Y->size(); ii++) {
+      int ipidx = translate_idx(ii, d1, d2, d3, scale_);
+      output_data[ii] = input_data[ipidx];
+    }
+    return true;
  }

 protected: