Windows shared build (#13550)

Summary: Hi guys, I'd like to build Caffe2 with more supported options in Windows with Microsoft Visual Studios. This is the first pull request. Running scripts/build_windows_shared.bat is able to build Caffe2 with both CMAKE_BUILD_TYPE=Debug and CMAKE_BUILD_TYPE=Release with Visual Studio 14 2015. CUDA is 9.0, cudnn is 7.0.5, glog, gflags and lmdb are supported on my system. Python is 3.5, Detectron works from python interface as well. It was even possible to debug detectron code and step into caffe2_gpu.dll with pdbs built. What is disappointing, that c10/experimental ops don't build with this Visual Studio generator, I added special option INCLUDE_EXPERIMENTAL_C10_OPS (default ON) to deal with it in build_windows_shared.bat. After this pull request the next step is to add Visual Studio 2017 support in the script. Pull Request resolved: https://github.com/pytorch/pytorch/pull/13550 Reviewed By: ezyang Differential Revision: D13042597 Pulled By: orionr fbshipit-source-id: f313f909f599cd582a1d000eff766eef3a9fc4fc
2025-10-20 12:54:11 +08:00 · 2018-11-16 12:06:21 -08:00
parent 2c21de2007
commit 8e91da4cb3
73 changed files with 537 additions and 497 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -65,6 +65,7 @@ option(BUILD_DOCS "Build Caffe2 documentation" OFF)
 option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
 option(BUILD_PYTHON "Build Python binaries" ON)
 option(BUILD_CAFFE2_OPS "Build Caffe2 operators" ON)
+option(BUILD_C10_EXPERIMENTAL_OPS "Build c10 experimental operators" ON)
 option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
 cmake_dependent_option(
    CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
--- a/aten/src/ATen/core/TensorTypeId.cpp
+++ b/aten/src/ATen/core/TensorTypeId.cpp
@ -1,10 +0,0 @@
-#include "ATen/core/TensorTypeId.h"
-#include "caffe2/utils/string_utils.h"
-
-namespace at {
-
-std::ostream& operator<<(std::ostream& str, at::TensorTypeId rhs) {
-  return str << caffe2::to_string(rhs.underlyingId());
-}
-
-} // namespace at
--- a/aten/src/ATen/core/TensorTypeId.h
+++ b/aten/src/ATen/core/TensorTypeId.h
@ -1,40 +1,2 @@
 #pragma once
-
-#include <iostream>
-#include <string>
-#include "c10/util/IdWrapper.h"
-#include "c10/macros/Macros.h"
-
-namespace at {
-
-namespace details {
-using _tensorTypeId_underlyingType = uint8_t;
-}
-
-/**
- * Dynamic type ID of a Tensor argument.  It represents something like
- * CPUTensor, etc.
- */
-class CAFFE2_API TensorTypeId final
-    : public at::
-          IdWrapper<TensorTypeId, details::_tensorTypeId_underlyingType> {
- public:
-  // Don't use this!
-  // Unfortunately, a default constructor needs to be defined because of
-  // https://reviews.llvm.org/D41223
-  constexpr TensorTypeId() noexcept : IdWrapper(0) {}
-
- private:
-  constexpr explicit TensorTypeId(
-      details::_tensorTypeId_underlyingType id) noexcept
-      : IdWrapper(id) {}
-
-  friend class TensorTypeIdCreator;
-  friend CAFFE2_API std::ostream& operator<<(std::ostream&, TensorTypeId);
-};
-
-CAFFE2_API std::ostream& operator<<(std::ostream&, at::TensorTypeId);
-
-} // namespace at
-
-C10_DEFINE_HASH_FOR_IDWRAPPER(at::TensorTypeId)
+#include <c10/util/TensorTypeId.h>
--- a/aten/src/ATen/core/TensorTypeIdRegistration.h
+++ b/aten/src/ATen/core/TensorTypeIdRegistration.h
@ -1,109 +1,2 @@
 #pragma once
-
-/**
- * To register your own tensor types, do in a header file:
- *   AT_DECLARE_TENSOR_TYPE(MY_TENSOR)
- * and in one (!) cpp file:
- *   AT_DEFINE_TENSOR_TYPE(MY_TENSOR)
- * Both must be in the same namespace.
- */
-
-#include "ATen/core/TensorTypeId.h"
-#include "c10/macros/Macros.h"
-
-#include <atomic>
-#include <mutex>
-#include <unordered_set>
-
-namespace at {
-
-class CAFFE2_API TensorTypeIdCreator final {
- public:
-  TensorTypeIdCreator();
-
-  at::TensorTypeId create();
-
-  static constexpr at::TensorTypeId undefined() noexcept {
-    return TensorTypeId(0);
-  }
-
- private:
-  std::atomic<details::_tensorTypeId_underlyingType> last_id_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator);
-};
-
-class CAFFE2_API TensorTypeIdRegistry final {
- public:
-  TensorTypeIdRegistry();
-
-  void registerId(at::TensorTypeId id);
-  void deregisterId(at::TensorTypeId id);
-
- private:
-  std::unordered_set<at::TensorTypeId> registeredTypeIds_;
-  std::mutex mutex_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry);
-};
-
-class CAFFE2_API TensorTypeIds final {
- public:
-  static TensorTypeIds& singleton();
-
-  at::TensorTypeId createAndRegister();
-  void deregister(at::TensorTypeId id);
-
-  static constexpr at::TensorTypeId undefined() noexcept;
-
- private:
-  TensorTypeIds();
-
-  TensorTypeIdCreator creator_;
-  TensorTypeIdRegistry registry_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIds);
-};
-
-inline constexpr at::TensorTypeId TensorTypeIds::undefined() noexcept {
-  return TensorTypeIdCreator::undefined();
-}
-
-class CAFFE2_API TensorTypeIdRegistrar final {
- public:
-  TensorTypeIdRegistrar();
-  ~TensorTypeIdRegistrar();
-
-  at::TensorTypeId id() const noexcept;
-
- private:
-  at::TensorTypeId id_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar);
-};
-
-inline at::TensorTypeId TensorTypeIdRegistrar::id() const noexcept {
-  return id_;
-}
-
-#define AT_DECLARE_TENSOR_TYPE(TensorName) \
-  CAFFE2_API at::TensorTypeId TensorName()
-
-#define AT_DEFINE_TENSOR_TYPE(TensorName)           \
-  at::TensorTypeId TensorName() {                   \
-    static TensorTypeIdRegistrar registration_raii; \
-    return registration_raii.id();                  \
-  }
-
-AT_DECLARE_TENSOR_TYPE(UndefinedTensorId);
-AT_DECLARE_TENSOR_TYPE(CPUTensorId); // PyTorch/Caffe2 supported
-AT_DECLARE_TENSOR_TYPE(CUDATensorId); // PyTorch/Caffe2 supported
-AT_DECLARE_TENSOR_TYPE(SparseCPUTensorId); // PyTorch only
-AT_DECLARE_TENSOR_TYPE(SparseCUDATensorId); // PyTorch only
-AT_DECLARE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
-
-} // namespace at
+#include "c10/util/TensorTypeIdRegistration.h"
--- a/aten/src/THC/THCAllocator.cpp
+++ b/aten/src/THC/THCAllocator.cpp
@ -19,3 +19,6 @@ at::DataPtr THCIpcDeleter::makeDataPtr(void* data, int device) {
  auto* context = new THCIpcDeleter(data, device);
  return {data, context, &deleteTHCIpcDeleter, at::Device(at::DeviceType::CUDA, cur_device)};
 }
+
+THCIpcDeleter::THCIpcDeleter(void* data, int device)
+    : data_(data), device_(device) {}
--- a/aten/src/THC/THCAllocator.h
+++ b/aten/src/THC/THCAllocator.h
@ -8,7 +8,7 @@
 #ifdef __cplusplus
 class CAFFE2_API THCIpcDeleter {
 public:
-  THCIpcDeleter(void* data, int device) : data_(data), device_(device) {};
+  THCIpcDeleter(void* data, int device);
  ~THCIpcDeleter();
  static at::DataPtr makeDataPtr(void* data, int device);
 private:
--- a/binaries/benchmark_helper.cc
+++ b/binaries/benchmark_helper.cc
@ -141,7 +141,7 @@ void loadInput(
        vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]);
        vector<int> input_dims;
        for (const string& s : input_dims_str) {
-          input_dims.push_back(caffe2::stoi(s));
+          input_dims.push_back(c10::stoi(s));
        }
        caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
        if (blob == nullptr) {
--- a/binaries/convert_image_to_tensor.cc
+++ b/binaries/convert_image_to_tensor.cc
@ -99,9 +99,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
    } else if (step == "normalize") {
      normalize = {255, 255, 255};
    } else if (step == "mean") {
-      mean = {0.406, 0.456, 0.485};
+      mean = {0.406f, 0.456f, 0.485f};
    } else if (step == "std") {
-      std = {0.225, 0.224, 0.229};
+      std = {0.225f, 0.224f, 0.229f};
    } else if (step == "bgrtorgb") {
      bgrtorgb = true;
    } else {
@ -143,9 +143,14 @@ std::vector<float> convertOneImage(std::string& filename) {
  assert(filename[0] != '~');

  std::cout << "Converting " << filename << std::endl;
+
  // Load image
  cv::Mat img = cv::imread(
+#if CV_MAJOR_VERSION <= 3
      filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
+#else
+      filename, FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
+#endif

  cv::Mat crop = cropToSquare(img);

--- a/binaries/speed_benchmark.cc
+++ b/binaries/speed_benchmark.cc
@ -127,7 +127,7 @@ int main(int argc, char** argv) {
        vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]);
        vector<int> input_dims;
        for (const string& s : input_dims_str) {
-          input_dims.push_back(caffe2::stoi(s));
+          input_dims.push_back(c10::stoi(s));
        }
        caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
        if (blob == nullptr) {
--- a/c10/util/IdWrapper.h
+++ b/c10/util/IdWrapper.h
@ -23,7 +23,7 @@ namespace c10 {
 * for you, given the underlying type supports it.
 */
 template <class ConcreteType, class UnderlyingType>
-class CAFFE2_API IdWrapper {
+class C10_API IdWrapper {
 public:
  using underlying_type = UnderlyingType;
  using concrete_type = ConcreteType;
--- a/c10/util/SmallVector.h
+++ b/c10/util/SmallVector.h
@ -53,7 +53,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) {
 } // namespace detail

 /// This is all the non-templated stuff common to all SmallVectors.
-class CAFFE2_API SmallVectorBase {
+class C10_API SmallVectorBase {
 protected:
  void *BeginX, *EndX, *CapacityX;

--- a/c10/util/StringUtil.h
+++ b/c10/util/StringUtil.h
@ -2,6 +2,7 @@
 #define C10_UTIL_STRINGUTIL_H_

 #include <c10/macros/Macros.h>
+#include <c10/util/string_utils.h>

 #include <cstddef>
 #include <ostream>
@ -73,28 +74,6 @@ struct C10_API SourceLocation {

 std::ostream& operator<<(std::ostream& out, const SourceLocation& loc);

-/// Portable implementation of std::stoi, which works for Android builds.
-///
-/// TODO: You won't be able to call this unqualified, because ADL means that it
-/// will be ambiguous with std::stoi.  Maybe we should fix this by giving
-/// our version a different name.
-inline int stoi(const std::string& str) {
-#if defined(__ANDROID__)
-  std::stringstream ss;
-  int n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-#else
-  return std::stoi(str);
-#endif // defined(__ANDROID__)
-}
-
 } // namespace c10

-// TODO: Remove me when namespace unification occurs
-namespace at {
-using c10::stoi;
-}
-
 #endif // C10_UTIL_STRINGUTIL_H_
--- a/c10/util/TensorTypeId.cpp
+++ b/c10/util/TensorTypeId.cpp
@ -0,0 +1,10 @@
+#include "c10/util/TensorTypeId.h"
+#include "c10/util/string_utils.h"
+
+namespace c10 {
+
+std::ostream& operator<<(std::ostream& str, c10::TensorTypeId rhs) {
+  return str << c10::to_string(rhs.underlyingId());
+}
+
+} // namespace c10
--- a/c10/util/TensorTypeId.h
+++ b/c10/util/TensorTypeId.h
@ -0,0 +1,43 @@
+#ifndef TENSOR_TYPE_ID_H_
+#define TENSOR_TYPE_ID_H_
+
+#include <iostream>
+#include <string>
+#include "c10/macros/Macros.h"
+#include "c10/util/IdWrapper.h"
+
+namespace c10 {
+
+namespace details {
+using _tensorTypeId_underlyingType = uint8_t;
+}
+
+/**
+ * Dynamic type ID of a Tensor argument.  It represents something like
+ * CPUTensor, etc.
+ */
+class C10_API TensorTypeId final
+    : public at::
+          IdWrapper<TensorTypeId, details::_tensorTypeId_underlyingType> {
+ public:
+  // Don't use this!
+  // Unfortunately, a default constructor needs to be defined because of
+  // https://reviews.llvm.org/D41223
+  constexpr TensorTypeId() noexcept : IdWrapper(0) {}
+
+ private:
+  constexpr explicit TensorTypeId(
+      details::_tensorTypeId_underlyingType id) noexcept
+      : IdWrapper(id) {}
+
+  friend class TensorTypeIdCreator;
+  friend C10_API std::ostream& operator<<(std::ostream&, TensorTypeId);
+};
+
+C10_API std::ostream& operator<<(std::ostream&, c10::TensorTypeId);
+
+} // namespace c10
+
+C10_DEFINE_HASH_FOR_IDWRAPPER(c10::TensorTypeId)
+
+#endif // TENSOR_TYPE_ID_H_
--- a/aten/src/ATen/core/TensorTypeIdRegistration.cpp
+++ b/aten/src/ATen/core/TensorTypeIdRegistration.cpp
@ -1,8 +1,8 @@
-#include <ATen/core/TensorTypeIdRegistration.h>
+#include <c10/util/TensorTypeIdRegistration.h>
 #include <c10/util/C++17.h>
 #include <c10/util/Exception.h>

-namespace at {
+namespace c10 {

 TensorTypeIds::TensorTypeIds() : creator_(), registry_() {}

@ -13,8 +13,7 @@ TensorTypeIds& TensorTypeIds::singleton() {

 TensorTypeIdCreator::TensorTypeIdCreator() : last_id_(0) {}

-at::TensorTypeId TensorTypeIdCreator::create() {
-
+c10::TensorTypeId TensorTypeIdCreator::create() {
  auto id = TensorTypeId(++last_id_);

  if (last_id_ == 0) { // overflow happened!
@ -31,23 +30,23 @@ at::TensorTypeId TensorTypeIdCreator::create() {

 TensorTypeIdRegistry::TensorTypeIdRegistry() : registeredTypeIds_(), mutex_() {}

-void TensorTypeIdRegistry::registerId(at::TensorTypeId id) {
+void TensorTypeIdRegistry::registerId(c10::TensorTypeId id) {
  std::lock_guard<std::mutex> lock(mutex_);
  registeredTypeIds_.emplace(id);
 }

-void TensorTypeIdRegistry::deregisterId(at::TensorTypeId id) {
+void TensorTypeIdRegistry::deregisterId(c10::TensorTypeId id) {
  std::lock_guard<std::mutex> lock(mutex_);
  registeredTypeIds_.erase(id);
 }

-at::TensorTypeId TensorTypeIds::createAndRegister() {
-  at::TensorTypeId id = creator_.create();
+c10::TensorTypeId TensorTypeIds::createAndRegister() {
+  c10::TensorTypeId id = creator_.create();
  registry_.registerId(id);
  return id;
 }

-void TensorTypeIds::deregister(at::TensorTypeId id) {
+void TensorTypeIds::deregister(c10::TensorTypeId id) {
  registry_.deregisterId(id);
 }

@ -58,15 +57,15 @@ TensorTypeIdRegistrar::~TensorTypeIdRegistrar() {
  TensorTypeIds::singleton().deregister(id_);
 }

-AT_DEFINE_TENSOR_TYPE(UndefinedTensorId);
-AT_DEFINE_TENSOR_TYPE(CPUTensorId);
-AT_DEFINE_TENSOR_TYPE(CUDATensorId);
-AT_DEFINE_TENSOR_TYPE(SparseCPUTensorId);
-AT_DEFINE_TENSOR_TYPE(SparseCUDATensorId);
-AT_DEFINE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(UndefinedTensorId);
+C10_DEFINE_TENSOR_TYPE(CPUTensorId);
+C10_DEFINE_TENSOR_TYPE(CUDATensorId);
+C10_DEFINE_TENSOR_TYPE(SparseCPUTensorId);
+C10_DEFINE_TENSOR_TYPE(SparseCUDATensorId);
+C10_DEFINE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(HIPTensorId); // Caffe2 only

-} // namespace at
+} // namespace c10
--- a/c10/util/TensorTypeIdRegistration.h
+++ b/c10/util/TensorTypeIdRegistration.h
@ -0,0 +1,112 @@
+#ifndef TENSOR_TYPE_ID_REGISTRATION_H_
+#define TENSOR_TYPE_ID_REGISTRATION_H_
+
+/**
+ * To register your own tensor types, do in a header file:
+ *   C10_DECLARE_TENSOR_TYPE(MY_TENSOR)
+ * and in one (!) cpp file:
+ *   C10_DEFINE_TENSOR_TYPE(MY_TENSOR)
+ * Both must be in the same namespace.
+ */
+
+#include "c10/macros/Macros.h"
+#include "c10/util/TensorTypeId.h"
+
+#include <atomic>
+#include <mutex>
+#include <unordered_set>
+
+namespace c10 {
+
+class C10_API TensorTypeIdCreator final {
+ public:
+  TensorTypeIdCreator();
+
+  c10::TensorTypeId create();
+
+  static constexpr c10::TensorTypeId undefined() noexcept {
+    return c10::TensorTypeId(0);
+  }
+
+ private:
+  std::atomic<details::_tensorTypeId_underlyingType> last_id_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator);
+};
+
+class C10_API TensorTypeIdRegistry final {
+ public:
+  TensorTypeIdRegistry();
+
+  void registerId(c10::TensorTypeId id);
+  void deregisterId(c10::TensorTypeId id);
+
+ private:
+  std::unordered_set<c10::TensorTypeId> registeredTypeIds_;
+  std::mutex mutex_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry);
+};
+
+class C10_API TensorTypeIds final {
+ public:
+  static TensorTypeIds& singleton();
+
+  c10::TensorTypeId createAndRegister();
+  void deregister(c10::TensorTypeId id);
+
+  static constexpr c10::TensorTypeId undefined() noexcept;
+
+ private:
+  TensorTypeIds();
+
+  TensorTypeIdCreator creator_;
+  TensorTypeIdRegistry registry_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIds);
+};
+
+inline constexpr c10::TensorTypeId TensorTypeIds::undefined() noexcept {
+  return TensorTypeIdCreator::undefined();
+}
+
+class C10_API TensorTypeIdRegistrar final {
+ public:
+  TensorTypeIdRegistrar();
+  ~TensorTypeIdRegistrar();
+
+  c10::TensorTypeId id() const noexcept;
+
+ private:
+  c10::TensorTypeId id_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar);
+};
+
+inline c10::TensorTypeId TensorTypeIdRegistrar::id() const noexcept {
+  return id_;
+}
+
+#define C10_DECLARE_TENSOR_TYPE(TensorName) \
+  C10_API c10::TensorTypeId TensorName()
+
+#define C10_DEFINE_TENSOR_TYPE(TensorName)          \
+  c10::TensorTypeId TensorName() {                  \
+    static TensorTypeIdRegistrar registration_raii; \
+    return registration_raii.id();                  \
+  }
+
+C10_DECLARE_TENSOR_TYPE(UndefinedTensorId);
+C10_DECLARE_TENSOR_TYPE(CPUTensorId); // PyTorch/Caffe2 supported
+C10_DECLARE_TENSOR_TYPE(CUDATensorId); // PyTorch/Caffe2 supported
+C10_DECLARE_TENSOR_TYPE(SparseCPUTensorId); // PyTorch only
+C10_DECLARE_TENSOR_TYPE(SparseCUDATensorId); // PyTorch only
+C10_DECLARE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
+
+} // namespace c10
+
+#endif // TENSOR_TYPE_ID_REGISTRATION_H_
--- a/c10/util/string_utils.h
+++ b/c10/util/string_utils.h
@ -0,0 +1,60 @@
+#pragma once
+
+#include <sstream>
+#include <string>
+
+using std::string;
+
+namespace c10 {
+
+// to_string, stoi and stod implementation for Android related stuff.
+// Note(jiayq): Do not use the CAFFE2_TESTONLY_FORCE_STD_STRING_TEST macro
+// outside testing code that lives under common_test.cc
+#if defined(__ANDROID__) || defined(CAFFE2_TESTONLY_FORCE_STD_STRING_TEST)
+#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 1
+template <typename T>
+std::string to_string(T value) {
+  std::ostringstream os;
+  os << value;
+  return os.str();
+}
+
+inline int stoi(const string& str) {
+  std::stringstream ss;
+  int n = 0;
+  ss << str;
+  ss >> n;
+  return n;
+}
+
+inline uint64_t stoull(const string& str) {
+  std::stringstream ss;
+  uint64_t n = 0;
+  ss << str;
+  ss >> n;
+  return n;
+}
+
+inline double stod(const string& str, std::size_t* pos = 0) {
+  std::stringstream ss;
+  ss << str;
+  double val = 0;
+  ss >> val;
+  if (pos) {
+    if (ss.tellg() == std::streampos(-1)) {
+      *pos = str.size();
+    } else {
+      *pos = ss.tellg();
+    }
+  }
+  return val;
+}
+#else
+#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 0
+using std::stod;
+using std::stoi;
+using std::stoull;
+using std::to_string;
+#endif // defined(__ANDROID__) || defined(CAFFE2_FORCE_STD_STRING_FALLBACK_TEST)
+
+} // namespace c10
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -524,6 +524,7 @@ if (BUILD_PYTHON)
      caffe2_pybind11_state caffe2_library)
  if (WIN32)
    target_link_libraries(caffe2_pybind11_state ${PYTHON_LIBRARIES})
+    target_link_libraries(caffe2_pybind11_state onnx_proto)
  endif(WIN32)

  # Install caffe2_pybind11_state(_gpu|hip) in site-packages/caffe2/python,
@ -548,6 +549,7 @@ if (BUILD_PYTHON)
        caffe2_pybind11_state_gpu caffe2_library caffe2_gpu_library)
    if (WIN32)
      target_link_libraries(caffe2_pybind11_state_gpu ${PYTHON_LIBRARIES})
+      target_link_libraries(caffe2_pybind11_state_gpu onnx_proto)
    endif(WIN32)

    # Install with same rpath as non-gpu caffe2_pybind11_state
--- a/caffe2/contrib/aten/aten_op_template.h
+++ b/caffe2/contrib/aten/aten_op_template.h
@ -167,7 +167,7 @@ private:
      descriptor << "-" << a;

    std::string descriptor_sized =
-        descriptor.str() + "-" + caffe2::to_string(InputSize());
+        descriptor.str() + "-" + c10::to_string(InputSize());
    std::string descriptor_var_args = descriptor.str() + "-*";
    if (op_to_key.count(descriptor_sized) > 0) {
      return op_to_key[descriptor_sized];
--- a/caffe2/contrib/prof/htrace_async_dag_net_gpu.cc
+++ b/caffe2/contrib/prof/htrace_async_dag_net_gpu.cc
@ -39,7 +39,7 @@ class HTraceAsyncDAGNet : public AsyncDAGNet {
    htrace::Scope run_scope(
        htrace_tracer_,
        htrace_root_scope_.GetSpanId(),
-        "run-scope-" + caffe2::to_string(run_count_++));
+        "run-scope-" + c10::to_string(run_count_++));
    return AsyncDAGNet::DoRunAsync();
  }

--- a/caffe2/contrib/prof/htrace_dag_net.cc
+++ b/caffe2/contrib/prof/htrace_dag_net.cc
@ -43,7 +43,7 @@ class HTraceDAGNet : public DAGNetBase {
    htrace::Scope run_scope(
        htrace_tracer_,
        htrace_root_scope_.GetSpanId(),
-        "run-scope-" + caffe2::to_string(run_count_++));
+        "run-scope-" + c10::to_string(run_count_++));
    return DAGNetBase::DoRunAsync();
  }

@ -64,8 +64,7 @@ class HTraceDAGNet : public DAGNetBase {
      htrace::Scope operator_scope(
          htrace_tracer_,
          worker_scope->GetSpanId(),
-          "#" + caffe2::to_string(idx) + " (" + print_name + ", " + op_type +
-              ")");
+          "#" + c10::to_string(idx) + " (" + print_name + ", " + op_type + ")");
      success &= operator_nodes_[idx].operator_->Run();
    }
    return success;
--- a/caffe2/contrib/script/compiler.cc
+++ b/caffe2/contrib/script/compiler.cc
@ -216,7 +216,7 @@ struct DefCompiler {
    }
  }
  std::string fresh(std::string prefix = "$t") {
-    return std::string(prefix) + caffe2::to_string(next_fresh++);
+    return std::string(prefix) + c10::to_string(next_fresh++);
  }
  const char* operatorName(int kind, int ninputs) {
    switch (kind) {
@ -252,7 +252,7 @@ struct DefCompiler {
      case TK_NOT:
        return "Not";
      default:
-        throw std::runtime_error("unknown kind " + caffe2::to_string(kind));
+        throw std::runtime_error("unknown kind " + c10::to_string(kind));
    }
  }
  void fillArg(Argument* arg, const Attribute& attr) {
@ -598,7 +598,7 @@ struct DefCompiler {
        return TensorProto_DataType_BOOL;
      default:
        throw std::runtime_error(
-            "expected type token: " + caffe2::to_string(type));
+            "expected type token: " + c10::to_string(type));
    }
  }

--- a/caffe2/contrib/script/lexer.cc
+++ b/caffe2/contrib/script/lexer.cc
@ -14,7 +14,7 @@ std::string kindToString(int kind) {
    TC_FORALL_TOKEN_KINDS(DEFINE_CASE)
 #undef DEFINE_CASE
    default:
-      throw std::runtime_error("unknown kind: " + caffe2::to_string(kind));
+      throw std::runtime_error("unknown kind: " + c10::to_string(kind));
  }
 }

--- a/caffe2/contrib/script/lexer.h
+++ b/caffe2/contrib/script/lexer.h
@ -358,7 +358,7 @@ struct Token {
  double doubleValue() {
    assert(TK_NUMBER == kind);
    size_t idx;
-    double r = ::caffe2::stod(text(), &idx);
+    double r = ::c10::stod(text(), &idx);
    assert(idx == range.size());
    return r;
  }
--- a/caffe2/core/common.h
+++ b/caffe2/core/common.h
@ -29,6 +29,8 @@

 #include "c10/macros/Macros.h"

+#include "c10/util/string_utils.h"
+
 namespace caffe2 {

 // Note(Yangqing): NVCC does not play well with unordered_map on some platforms,
@ -125,57 +127,6 @@ make_unique(Args&&...) = delete;

 #endif

-// to_string, stoi and stod implementation for Android related stuff.
-// Note(jiayq): Do not use the CAFFE2_TESTONLY_FORCE_STD_STRING_TEST macro
-// outside testing code that lives under common_test.cc
-#if defined(__ANDROID__) || defined(CAFFE2_TESTONLY_FORCE_STD_STRING_TEST)
-#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 1
-template <typename T>
-std::string to_string(T value)
-{
-  std::ostringstream os;
-  os << value;
-  return os.str();
-}
-
-inline int stoi(const string& str) {
-  std::stringstream ss;
-  int n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-}
-
-inline uint64_t stoull(const string& str) {
-  std::stringstream ss;
-  uint64_t n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-}
-
-inline double stod(const string& str, std::size_t* pos = 0) {
-  std::stringstream ss;
-  ss << str;
-  double val = 0;
-  ss >> val;
-  if (pos) {
-    if (ss.tellg() == std::streampos(-1)) {
-      *pos = str.size();
-    } else {
-      *pos = ss.tellg();
-    }
-  }
-  return val;
-}
-#else
-#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 0
-using std::to_string;
-using std::stoi;
-using std::stoull;
-using std::stod;
-#endif // defined(__ANDROID__) || defined(CAFFE2_FORCE_STD_STRING_FALLBACK_TEST)
-
 #if defined(__ANDROID__) && !defined(__NDK_MAJOR__)
 using ::round;
 #else
@ -238,6 +189,6 @@ CAFFE2_API void SetHipRuntimeFlag();
 // CMake)
 CAFFE2_API const std::map<string, string>& GetBuildOptions();

-}  // namespace caffe2
+} // namespace caffe2

 #endif  // CAFFE2_CORE_COMMON_H_
--- a/caffe2/core/common_test.cc
+++ b/caffe2/core/common_test.cc
@ -17,7 +17,7 @@ TEST(CommonTest, TestStoi) {
  EXPECT_TRUE(CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS);
  string s = "1234";
  int i_std = std::stoi(s);
-  int i_caffe2 = ::caffe2::stoi(s);
+  int i_caffe2 = ::c10::stoi(s);
  EXPECT_EQ(i_std, i_caffe2);
 }

@ -26,14 +26,14 @@ TEST(CommonTest, TestStod) {
  string s = "1.234";
  std::size_t p_std = 0, p_caffe2 = 0;
  double d_std = std::stod(s, &p_std);
-  double d_caffe2 = ::caffe2::stod(s, &p_caffe2);
+  double d_caffe2 = ::c10::stod(s, &p_caffe2);
  EXPECT_EQ(d_std, d_caffe2);
  EXPECT_EQ(p_std, p_caffe2);

  // Only part of the string is parsed.
  s = "1.234 5.678";
  d_std = std::stod(s, &p_std);
-  d_caffe2 = ::caffe2::stod(s, &p_caffe2);
+  d_caffe2 = ::c10::stod(s, &p_caffe2);
  EXPECT_EQ(d_std, d_caffe2);
  EXPECT_EQ(p_std, p_caffe2);
 }
--- a/caffe2/core/memonger.cc
+++ b/caffe2/core/memonger.cc
@ -67,7 +67,7 @@ NetDef optimize_inference_net(

          // Safety check to prevent double-memongering nets.
          string shared_blob =
-              "__m" + caffe2::to_string(renaming.size()) + "_shared";
+              "__m" + c10::to_string(renaming.size()) + "_shared";
          if (all_blobs.find(shared_blob) != all_blobs.end()) {
            LOG(INFO) << "Net was already memongered!";
            return net;
@ -211,7 +211,7 @@ class ComputeBlobRecyclingForDag {
        if (renamed.find(mapped_blob.second) == renamed.end()) {
          renamed.insert(
              {mapped_blob.second,
-               namescope + "__m" + caffe2::to_string(name_idx++) + "_shared"});
+               namescope + "__m" + c10::to_string(name_idx++) + "_shared"});
        }
      } else {
        renamed.insert({mapped_blob.second, mapped_blob.second});
--- a/caffe2/core/net_async_base.cc
+++ b/caffe2/core/net_async_base.cc
@ -182,10 +182,10 @@ TaskThreadPoolBase* AsyncNetBase::pool(const DeviceOption& device_option) {
    auto gpu_id = device_option.device_id();
    CAFFE_ENFORCE(
        gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
-        "Invalid GPU id: " + caffe2::to_string(gpu_id));
+        "Invalid GPU id: " + c10::to_string(gpu_id));
    return poolGetter(gpu_pools_, device_type, gpu_id, num_workers_);
  } else {
-    CAFFE_THROW("Unsupported device type " + caffe2::to_string(device_type));
+    CAFFE_THROW("Unsupported device type " + c10::to_string(device_type));
  }
 }

@ -194,7 +194,7 @@ int AsyncNetBase::stream(int task_id) {
  int stream_id = 0;
  if (IsGPUDeviceType(device_option.device_type())) {
    int gpu_id = device_option.device_id();
-    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
+    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + c10::to_string(gpu_id));
    if ((unsigned)gpu_id >= getStreamCounters().size()) {
      getStreamCounters().resize(gpu_id + 1, 0);
    }
--- a/caffe2/core/net_async_dag_gpu.cc
+++ b/caffe2/core/net_async_dag_gpu.cc
@ -111,7 +111,7 @@ int AsyncDAGNet::stream(const DeviceOption& device_option) {
  int stream_id = 0;
  if (device_option.device_type() == PROTO_CUDA) {
    int gpu_id = device_option.device_id();
-    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
+    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + c10::to_string(gpu_id));
    if ((unsigned)gpu_id >= stream_counters_.size()) {
      stream_counters_.resize(gpu_id + 1, 0);
    }
--- a/caffe2/core/net_async_tracing.cc
+++ b/caffe2/core/net_async_tracing.cc
@ -64,7 +64,7 @@ Tracer::Tracer(
      config_(config) {
  std::replace(filename_.begin(), filename_.end(), '/', '_');
  filename_ = this->config().filepath + "/" + filename_ + "_id_" +
-      caffe2::to_string(getCounterForNetName(net_name));
+      c10::to_string(getCounterForNetName(net_name));
  timer_.Start();
 }

@ -81,7 +81,7 @@ std::string Tracer::opTraceName(const OperatorBase* op) {
  int unique_shard_id =
      op->has_debug_def() ? getUniqueShardId(op->debug_def()) : -1;
  if (unique_shard_id != -1) {
-    return op->type() + ":" + caffe2::to_string(unique_shard_id);
+    return op->type() + ":" + c10::to_string(unique_shard_id);
  } else {
    return op->type();
  }
@ -366,7 +366,7 @@ int extractShardId(const std::string& name) {
    while (right_pos < name.length() && isdigit(name[right_pos])) {
      right_pos++;
    }
-    return caffe2::stoi(name.substr(left_pos, right_pos - left_pos));
+    return c10::stoi(name.substr(left_pos, right_pos - left_pos));
  } else {
    return -1;
  }
@ -463,7 +463,7 @@ bool startIter(const std::shared_ptr<Tracer>& tracer) {
  tracer->setEnabled(is_enabled);
  if (should_dump) {
    int dumping_iter = tracer->bumpDumpingIter();
-    tracer->dumpTracingResultAndClearEvents(caffe2::to_string(dumping_iter));
+    tracer->dumpTracingResultAndClearEvents(c10::to_string(dumping_iter));
  }
  return is_enabled;
 }
--- a/caffe2/core/numa.cc
+++ b/caffe2/core/numa.cc
@ -26,7 +26,7 @@ void NUMABind(int numa_node_id) {

  CAFFE_ENFORCE(
      numa_node_id <= numa_max_node(),
-      "NUMA node id " + caffe2::to_string(numa_node_id) + " is unavailable");
+      "NUMA node id " + c10::to_string(numa_node_id) + " is unavailable");

  auto bm = numa_allocate_nodemask();
  numa_bitmask_clearall(bm);
--- a/caffe2/image/image_input_op.cc
+++ b/caffe2/image/image_input_op.cc
@ -2,6 +2,13 @@

 namespace caffe2 {

+template <>
+bool ImageInputOp<CPUContext>::ApplyTransformOnGPU(
+    const std::vector<std::int64_t>&,
+    const c10::Device&) {
+  return false;
+}
+
 REGISTER_CPU_OPERATOR(ImageInput, ImageInputOp<CPUContext>);

 OPERATOR_SCHEMA(ImageInput)
--- a/caffe2/image/image_input_op.h
+++ b/caffe2/image/image_input_op.h
@ -83,6 +83,9 @@ class ImageInputOp final
  void DecodeAndTransposeOnly(
      const std::string& value, uint8_t *image_data, int item_id,
      const int channels, std::size_t thread_index);
+  bool ApplyTransformOnGPU(
+      const std::vector<std::int64_t>& dims,
+      const c10::Device& type);

  unique_ptr<db::DBReader> owned_reader_;
  const db::DBReader* reader_;
@ -1206,7 +1209,7 @@ bool ImageInputOp<Context>::Prefetch() {
      max_decode_error_ratio_) {
    throw std::runtime_error(
        "max_decode_error_ratio exceeded " +
-        caffe2::to_string(max_decode_error_ratio_));
+        c10::to_string(max_decode_error_ratio_));
  }

  // If the context is not CPUContext, we will need to do a copy in the
@ -1267,22 +1270,10 @@ bool ImageInputOp<Context>::CopyPrefetched() {
      const int N = X.dim32(0), C = X.dim32(3), H = X.dim32(1), W = X.dim32(2);
      // data goes out as NCHW
      auto dims = std::vector<int64_t>{N, C, H, W};
-      // GPU transform kernel allows explicitly setting output type
-      if (output_type_ == TensorProto_DataType_FLOAT) {
-        auto* image_output = OperatorBase::OutputTensor(
-            0, dims, at::dtype<float>().device(type));
-        TransformOnGPU<uint8_t,float,Context>(prefetched_image_on_device_,
-                                              image_output, mean_gpu_,
-                                              std_gpu_, &context_);
-      } else if (output_type_ == TensorProto_DataType_FLOAT16) {
-        auto* image_output = OperatorBase::OutputTensor(
-            0, dims, at::dtype<at::Half>().device(type));
-        TransformOnGPU<uint8_t,at::Half,Context>(prefetched_image_on_device_,
-                                                image_output, mean_gpu_,
-                                                std_gpu_, &context_);
-      }  else {
+      if (!ApplyTransformOnGPU(dims, type)) {
        return false;
      }
+
    } else {
      OperatorBase::OutputTensorCopyFrom(
          0, type, prefetched_image_on_device_, &context_);
--- a/caffe2/image/image_input_op_gpu.cc
+++ b/caffe2/image/image_input_op_gpu.cc
@ -4,6 +4,35 @@

 namespace caffe2 {

+template <>
+bool ImageInputOp<CUDAContext>::ApplyTransformOnGPU(
+    const std::vector<std::int64_t>& dims,
+    const c10::Device& type) {
+  // GPU transform kernel allows explicitly setting output type
+  if (output_type_ == TensorProto_DataType_FLOAT) {
+    auto* image_output =
+        OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(type));
+    TransformOnGPU<uint8_t, float, CUDAContext>(
+        prefetched_image_on_device_,
+        image_output,
+        mean_gpu_,
+        std_gpu_,
+        &context_);
+  } else if (output_type_ == TensorProto_DataType_FLOAT16) {
+    auto* image_output =
+        OperatorBase::OutputTensor(0, dims, at::dtype<at::Half>().device(type));
+    TransformOnGPU<uint8_t, at::Half, CUDAContext>(
+        prefetched_image_on_device_,
+        image_output,
+        mean_gpu_,
+        std_gpu_,
+        &context_);
+  } else {
+    return false;
+  }
+  return true;
+}
+
 REGISTER_CUDA_OPERATOR(ImageInput, ImageInputOp<CUDAContext>);

 }  // namespace caffe2
--- a/caffe2/mobile/contrib/arm-compute/test/gl_concat_op_test.cc
+++ b/caffe2/mobile/contrib/arm-compute/test/gl_concat_op_test.cc
@ -16,14 +16,18 @@ TEST(OPENGLOperatorTest, Concat) {
    int H = 8;
    int W = 8;
    for (int i = 0; i < Cs.size(); ++i) {
-      PopulateCPUBlob(&ws, true, std::string("cpu_X") + caffe2::to_string(i), {batchSize, Cs[i], H, W});
+      PopulateCPUBlob(
+          &ws,
+          true,
+          std::string("cpu_X") + c10::to_string(i),
+          {batchSize, Cs[i], H, W});
    }

  NetDef cpu_net;
  {
    OperatorDef* def = AddOp(&cpu_net, "Concat", {}, {"ref_Y", "cpu_dummy"});
      for (int i = 0; i < Cs.size(); ++i ) {
-        def->add_input(std::string("cpu_X") + caffe2::to_string(i));
+        def->add_input(std::string("cpu_X") + c10::to_string(i));
      }
  }

@ -33,7 +37,7 @@ TEST(OPENGLOperatorTest, Concat) {
    OperatorDef* def = AddOp(&gpu_net, "Concat", {}, {"gpu_Y", "gpu_dummy"});
    MAKE_OPENGL_OPERATOR(def);
    for (int i = 0; i < Cs.size(); ++i ) {
-      def->add_input(std::string("cpu_X") + caffe2::to_string(i));
+      def->add_input(std::string("cpu_X") + c10::to_string(i));
    }
  }

--- a/caffe2/mobile/contrib/opengl/operators/GLConcat.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLConcat.cc
@ -19,18 +19,24 @@ class GLConcat : public GLFilter {
  binding* input_tile_x;

  GLConcat(tile_descriptor output_tile_geometries, bool tiling = false)
-      : GLFilter("GLConcat",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>(
-                     {BINDING(outputSize), BINDING(inputData), BINDING(inputTileRange), BINDING(input_tile_x)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"TILING", caffe2::to_string(tiling)},
-                  {"OUTPUT_TILES", caffe2::to_string(output_tile_geometries.tiles)},
-                  {"OUTPUT_TILE_X", caffe2::to_string(output_tile_geometries.tile_dims.x)},
-                  {"OUTPUT_TILE_WIDTH", caffe2::to_string(output_tile_geometries.tile_size.x)},
-                  {"OUTPUT_TILE_HEIGHT", caffe2::to_string(output_tile_geometries.tile_size.y)}}),
+      : GLFilter(
+            "GLConcat",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(outputSize),
+                                   BINDING(inputData),
+                                   BINDING(inputTileRange),
+                                   BINDING(input_tile_x)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"TILING", c10::to_string(tiling)},
+             {"OUTPUT_TILES", c10::to_string(output_tile_geometries.tiles)},
+             {"OUTPUT_TILE_X",
+              c10::to_string(output_tile_geometries.tile_dims.x)},
+             {"OUTPUT_TILE_WIDTH",
+              c10::to_string(output_tile_geometries.tile_size.x)},
+             {"OUTPUT_TILE_HEIGHT",
+              c10::to_string(output_tile_geometries.tile_size.y)}}),
        tiling_(tiling) {}

  template <typename T>
--- a/caffe2/mobile/contrib/opengl/operators/GLConvolution.h
+++ b/caffe2/mobile/contrib/opengl/operators/GLConvolution.h
@ -76,47 +76,42 @@ class GLConvolution : public GLFilter {
                _output_tile_batch_size,
                _prelu_scale != nullptr),
            {/* no attributes */},
-            {{"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
-             {"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
-             {"INPUT_BATCH_SIZE", caffe2::to_string(_input_batch_size)},
-             {"OUTPUT_BATCH_SIZE", caffe2::to_string(_output_batch_size)},
-             {"INPUT_TILES", caffe2::to_string(_input_tiles)},
-             {"OUTPUT_TILES", caffe2::to_string(_output_tiles)},
-             {"INPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.input_tile_size.x)},
-             {"INPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.input_tile_size.y)},
+            {{"KERNEL_SIZE_X", c10::to_string(_geometry.kernel_size.x)},
+             {"KERNEL_SIZE_Y", c10::to_string(_geometry.kernel_size.y)},
+             {"INPUT_BATCH_SIZE", c10::to_string(_input_batch_size)},
+             {"OUTPUT_BATCH_SIZE", c10::to_string(_output_batch_size)},
+             {"INPUT_TILES", c10::to_string(_input_tiles)},
+             {"OUTPUT_TILES", c10::to_string(_output_tiles)},
+             {"INPUT_TILE_WIDTH", c10::to_string(_geometry.input_tile_size.x)},
+             {"INPUT_TILE_HEIGHT", c10::to_string(_geometry.input_tile_size.y)},
             {"OUTPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.output_tile_size.x)},
+              c10::to_string(_geometry.output_tile_size.x)},
             {"OUTPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.output_tile_size.y)},
-             {"INPUT_TILE_X",
-              caffe2::to_string(_geometry.input_tile_grid_size.x)},
+              c10::to_string(_geometry.output_tile_size.y)},
+             {"INPUT_TILE_X", c10::to_string(_geometry.input_tile_grid_size.x)},
             {"OUTPUT_TILE_X",
-              caffe2::to_string(_geometry.output_tile_grid_size.x)},
-             {"INPUT_TILE_CHUNK_SIZE",
-              caffe2::to_string(_input_tile_chunk_size)},
+              c10::to_string(_geometry.output_tile_grid_size.x)},
+             {"INPUT_TILE_CHUNK_SIZE", c10::to_string(_input_tile_chunk_size)},
             {"OUTPUT_TILE_CHUNK_SIZE",
-              caffe2::to_string(_output_tile_chunk_size)},
+              c10::to_string(_output_tile_chunk_size)},
             {"OUTPUT_TILE_BATCH_SIZE",
-              caffe2::to_string(_output_tile_batch_size)},
-             {"TILED_CONVOLUTION", caffe2::to_string(_tiling)},
+              c10::to_string(_output_tile_batch_size)},
+             {"TILED_CONVOLUTION", c10::to_string(_tiling)},
             {"INPUT_PADDING_X",
-              caffe2::to_string(
+              c10::to_string(
                  _geometry.transposed
                      ? _geometry.kernel_size.x - 1 - _geometry.input_padding.x
                      : _geometry.input_padding.x)},
             {"INPUT_PADDING_Y",
-              caffe2::to_string(
+              c10::to_string(
                  _geometry.transposed
                      ? _geometry.kernel_size.y - 1 - _geometry.input_padding.y
                      : _geometry.input_padding.y)},
-             {"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
-             {"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
-             {"TRANSPOSED_CONVOLUTION",
-              caffe2::to_string(_geometry.transposed)},
+             {"INPUT_STRIDE_X", c10::to_string(_geometry.input_stride.x)},
+             {"INPUT_STRIDE_Y", c10::to_string(_geometry.input_stride.y)},
+             {"TRANSPOSED_CONVOLUTION", c10::to_string(_geometry.transposed)},
             {"BOUNDS_CHECK_MODE",
-              caffe2::to_string(bounds_check_mode(_tiling, _geometry))}}),
+              c10::to_string(bounds_check_mode(_tiling, _geometry))}}),
        kernel(_kernel),
        bias(_bias),
        prelu_scale(_prelu_scale),
@ -176,14 +171,13 @@ class GLConvolution : public GLFilter {

    for (int i = 0; i < input_batch_size; i++) {
      bindings.push_back(
-          inputData[i] =
-              new binding{"inputData[" + caffe2::to_string(i) + "]"});
+          inputData[i] = new binding{"inputData[" + c10::to_string(i) + "]"});
    }

    for (int i = 0; i < output_batch_size; i++) {
      bindings.push_back(
          previousData[i] =
-              new binding{"previousData[" + caffe2::to_string(i) + "]"});
+              new binding{"previousData[" + c10::to_string(i) + "]"});
    }

    return bindings;
@ -203,7 +197,7 @@ class GLConvolution : public GLFilter {
         i++) {
      bindings.push_back(
          kernel_block[i] =
-              new binding{"Kernel_block[" + caffe2::to_string(i) + "]"});
+              new binding{"Kernel_block[" + c10::to_string(i) + "]"});
    }

    return bindings;
--- a/caffe2/mobile/contrib/opengl/operators/GLInstanceNorm.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLInstanceNorm.cc
@ -35,14 +35,15 @@ class GLReduce : public GLFilter {
  }

  GLReduce(bool compute_inv_stdev_ = false, bool compute_norm_ = false)
-      : GLFilter("GLReduce",
-                 vertex_shader,
-                 fragment_shader,
-                 input_bindings(compute_norm_),
-                 {/* no uniform_blocks_bindings */},
-                 {/* no attributes */},
-                 {{"COMPUTE_INV_STDEV", caffe2::to_string((int)compute_inv_stdev_)},
-                  {"COMPUTE_NORM", caffe2::to_string((int)compute_norm_)}}),
+      : GLFilter(
+            "GLReduce",
+            vertex_shader,
+            fragment_shader,
+            input_bindings(compute_norm_),
+            {/* no uniform_blocks_bindings */},
+            {/* no attributes */},
+            {{"COMPUTE_INV_STDEV", c10::to_string((int)compute_inv_stdev_)},
+             {"COMPUTE_NORM", c10::to_string((int)compute_norm_)}}),
        compute_inv_stdev(compute_inv_stdev_),
        compute_norm(compute_norm_) {}

@ -208,18 +209,20 @@ class GLScale : public GLFilter {
    return bindings;
  }

-  GLScale(const int _channels,
-          const float* _scale,
-          const float* _bias,
-          const float* _prelu_scale = nullptr,
-          const int _prelu_size = 0)
-      : GLFilter("GLScale",
-                 vertex_shader,
-                 fragment_shader,
-                 input_bindings(_prelu_scale != nullptr),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"FUSE_PRELU", caffe2::to_string(_prelu_scale != nullptr)}}),
+  GLScale(
+      const int _channels,
+      const float* _scale,
+      const float* _bias,
+      const float* _prelu_scale = nullptr,
+      const int _prelu_size = 0)
+      : GLFilter(
+            "GLScale",
+            vertex_shader,
+            fragment_shader,
+            input_bindings(_prelu_scale != nullptr),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"FUSE_PRELU", c10::to_string(_prelu_scale != nullptr)}}),
        channels(_channels),
        scale(_scale),
        bias(_bias),
--- a/caffe2/mobile/contrib/opengl/operators/GLPRelu.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLPRelu.cc
@ -39,14 +39,13 @@ class GLPRelu : public GLFilter {
            std::vector<binding*>({BINDING(inputData)}),
            std::vector<binding*>({BINDING(scale_block)}),
            {/* no attributes */},
-            {{"USE_RELU", caffe2::to_string(PRelu)},
-             {"OUTPUT_TILES",
-              caffe2::to_string(_output_tile_x * _output_tile_y)},
-             {"OUTPUT_TILE_X", caffe2::to_string(_output_tile_x)},
-             {"OUTPUT_TILE_WIDTH", caffe2::to_string(_output_tile_width)},
-             {"OUTPUT_TILE_HEIGHT", caffe2::to_string(_output_tile_height)},
+            {{"USE_RELU", c10::to_string(PRelu)},
+             {"OUTPUT_TILES", c10::to_string(_output_tile_x * _output_tile_y)},
+             {"OUTPUT_TILE_X", c10::to_string(_output_tile_x)},
+             {"OUTPUT_TILE_WIDTH", c10::to_string(_output_tile_width)},
+             {"OUTPUT_TILE_HEIGHT", c10::to_string(_output_tile_height)},
             {"TILED_PRELU",
-              caffe2::to_string(_output_tile_x > 1 || _output_tile_y > 1)}}),
+              c10::to_string(_output_tile_x > 1 || _output_tile_y > 1)}}),
        scale(_scale),
        scale_size(_scale_size),
        channels(_channels),
@ -56,18 +55,19 @@ class GLPRelu : public GLFilter {
        output_tile_height(_output_tile_height) {}

  GLPRelu(const int _channels)
-      : GLFilter("GLRelu",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>({BINDING(inputData)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"USE_RELU", caffe2::to_string(Relu)},
-                  {"OUTPUT_TILES", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_X", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_WIDTH", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_HEIGHT", caffe2::to_string(1)},
-                  {"TILED_PRELU", caffe2::to_string(0)}}),
+      : GLFilter(
+            "GLRelu",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(inputData)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"USE_RELU", c10::to_string(Relu)},
+             {"OUTPUT_TILES", c10::to_string(1)},
+             {"OUTPUT_TILE_X", c10::to_string(1)},
+             {"OUTPUT_TILE_WIDTH", c10::to_string(1)},
+             {"OUTPUT_TILE_HEIGHT", c10::to_string(1)},
+             {"TILED_PRELU", c10::to_string(0)}}),
        scale(nullptr),
        scale_block(nullptr),
        scale_size(0),
--- a/caffe2/mobile/contrib/opengl/operators/GLPool.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLPool.cc
@ -42,23 +42,21 @@ class GLPool : public GLFilter {
            },
            {/* no uniform blocks */},
            {/* no attributes */},
-            {{"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
-             {"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
-             {"INPUT_PADDING_X", caffe2::to_string(_geometry.input_padding.x)},
-             {"INPUT_PADDING_Y", caffe2::to_string(_geometry.input_padding.y)},
-             {"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
-             {"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
-             {"INPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.input_tile_size.x)},
-             {"INPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.input_tile_size.y)},
+            {{"KERNEL_SIZE_X", c10::to_string(_geometry.kernel_size.x)},
+             {"KERNEL_SIZE_Y", c10::to_string(_geometry.kernel_size.y)},
+             {"INPUT_PADDING_X", c10::to_string(_geometry.input_padding.x)},
+             {"INPUT_PADDING_Y", c10::to_string(_geometry.input_padding.y)},
+             {"INPUT_STRIDE_X", c10::to_string(_geometry.input_stride.x)},
+             {"INPUT_STRIDE_Y", c10::to_string(_geometry.input_stride.y)},
+             {"INPUT_TILE_WIDTH", c10::to_string(_geometry.input_tile_size.x)},
+             {"INPUT_TILE_HEIGHT", c10::to_string(_geometry.input_tile_size.y)},
             {"OUTPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.output_tile_size.x)},
+              c10::to_string(_geometry.output_tile_size.x)},
             {"OUTPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.output_tile_size.y)},
-             {"TILED_POOLING", caffe2::to_string(_tiling)},
-             {"MAX_POOL", caffe2::to_string(poolType == MaxPool)},
-             {"BOUNDS_CHECK_MODE", caffe2::to_string(1)}}),
+              c10::to_string(_geometry.output_tile_size.y)},
+             {"TILED_POOLING", c10::to_string(_tiling)},
+             {"MAX_POOL", c10::to_string(poolType == MaxPool)},
+             {"BOUNDS_CHECK_MODE", c10::to_string(1)}}),
        geometry(_geometry) {}
  ~GLPool() {}

--- a/caffe2/mobile/contrib/opengl/operators/GLSigmoid.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLSigmoid.cc
@ -16,14 +16,15 @@ class GLSigmoid : public GLFilter {
  binding* outputSize;

  GLSigmoid(OpType opType)
-      : GLFilter("GLSigmoid",
-                 vertex_shader,
-                 fragment_shader,
-                 {BINDING(outputSize), BINDING(inputData)},
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"SIGMOID", caffe2::to_string(opType == Sigmoid)},
-                  {"TANH", caffe2::to_string(opType == Tanh)}}) {}
+      : GLFilter(
+            "GLSigmoid",
+            vertex_shader,
+            fragment_shader,
+            {BINDING(outputSize), BINDING(inputData)},
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"SIGMOID", c10::to_string(opType == Sigmoid)},
+             {"TANH", c10::to_string(opType == Tanh)}}) {}

  template <typename T>
  void sigmoid(const GLImageVector<T>& input_images, const GLImageVector<T>& output_images);
--- a/caffe2/mobile/contrib/opengl/operators/GLSoftmax.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLSoftmax.cc
@ -42,9 +42,9 @@ class GLSoftmaxReduce : public GLFilter {
            input_bindings(),
            {/* no uniform_blocks_bindings */},
            {/* no attributes */},
-            {{"COMPUTE_SUM", caffe2::to_string((int)compute_sum_)},
-             {"INPUT_TILE_X", caffe2::to_string(input_tile_x)},
-             {"TILED_SOFTMAX", caffe2::to_string(int(tiled))}}) {}
+            {{"COMPUTE_SUM", c10::to_string((int)compute_sum_)},
+             {"INPUT_TILE_X", c10::to_string(input_tile_x)},
+             {"TILED_SOFTMAX", c10::to_string(int(tiled))}}) {}

  template <typename T>
  void reduce(const GLImage<T>* input_image,
@ -190,8 +190,8 @@ class GLSoftmaxScale : public GLFilter {
            input_bindings(),
            {/* no uniform blocks */},
            {/* no attributes */},
-            {{"COMPUTE_EXP", caffe2::to_string((int)_compute_exp)},
-             {"TILED_SOFTMAX", caffe2::to_string((int)tiled)}}) {}
+            {{"COMPUTE_EXP", c10::to_string((int)_compute_exp)},
+             {"TILED_SOFTMAX", c10::to_string((int)tiled)}}) {}

  template <typename T>
  void scale(const GLImage<T>* input_image,
--- a/caffe2/mobile/contrib/opengl/operators/GLStylizer.cc
+++ b/caffe2/mobile/contrib/opengl/operators/GLStylizer.cc
@ -19,13 +19,18 @@ class GLStylizer : public GLFilter {

 public:
  GLStylizer(bool _deprocess = false, InputFormat input_format = BGRA)
-      : GLFilter(_deprocess ? "GLDeStylizer" : "GLStylizer",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>({BINDING(inputData), BINDING(mean), BINDING(noise_std), BINDING(outputSize)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"DEPROCESS", caffe2::to_string(_deprocess)}, {"RGBAINPUT", caffe2::to_string(input_format)}}),
+      : GLFilter(
+            _deprocess ? "GLDeStylizer" : "GLStylizer",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(inputData),
+                                   BINDING(mean),
+                                   BINDING(noise_std),
+                                   BINDING(outputSize)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"DEPROCESS", c10::to_string(_deprocess)},
+             {"RGBAINPUT", c10::to_string(input_format)}}),
        deprocess(_deprocess) {}

  template <typename T1, typename T2>
--- a/caffe2/mobile/contrib/opengl/test/opengl_test.cc
+++ b/caffe2/mobile/contrib/opengl/test/opengl_test.cc
@ -814,8 +814,8 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
            << "H: " << H << ", W: " << W;
  Workspace ws;
  for (int i = 0; i < Cs.size(); i++) {
-    auto* t = BlobGetMutableTensor(
-        ws.CreateBlob("X_cpu" + caffe2::to_string(i)), CPU);
+    auto* t =
+        BlobGetMutableTensor(ws.CreateBlob("X_cpu" + c10::to_string(i)), CPU);
    t->Resize(N, Cs[i], H, W);
    CPUContext ctx0;
    // Too noisy.
@ -826,8 +826,8 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
  for (int i = 0; i < Cs.size(); i++) {
    auto& op = *(netdef.add_op());
    op.set_type("CopyToOpenGL");
-    op.add_input("X_cpu" + caffe2::to_string(i));
-    op.add_output("X_gl" + caffe2::to_string(i));
+    op.add_input("X_cpu" + c10::to_string(i));
+    op.add_output("X_gl" + c10::to_string(i));
    if (tiling) {
      int tile_x = 1, tile_y = 1;
      computeOutputTiles(Cs[i], tile_x, tile_y);
@ -849,7 +849,7 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
    auto& op = *(netdef.add_op());
    op.set_type("OpenGLConcat");
    for (int i = 0; i < Cs.size(); i++) {
-      op.add_input("X_gl" + caffe2::to_string(i));
+      op.add_input("X_gl" + c10::to_string(i));
    }
    {
      auto& arg = *(op.add_arg());
@ -871,7 +871,7 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
    auto& op = *(netdef.add_op());
    op.set_type("Concat");
    for (int i = 0; i < Cs.size(); i++) {
-      op.add_input("X_cpu" + caffe2::to_string(i));
+      op.add_input("X_cpu" + c10::to_string(i));
    }
    auto& arg = *(op.add_arg());
    arg.set_name("order");
--- a/caffe2/observers/runcnt_observer.cc
+++ b/caffe2/observers/runcnt_observer.cc
@ -13,9 +13,9 @@ std::string RunCountNetObserver::debugInfo() {
 #if CAFFE2_ANDROID
  // workaround
  int foo = cnt_;
-  return "This operator runs " + caffe2::to_string(foo) + " times.";
+  return "This operator runs " + c10::to_string(foo) + " times.";
 #else
-  return "This operator runs " + caffe2::to_string(cnt_) + " times.";
+  return "This operator runs " + c10::to_string(cnt_) + " times.";
 #endif
 }

--- a/caffe2/onnx/backend.cc
+++ b/caffe2/onnx/backend.cc
@ -557,7 +557,7 @@ Caffe2Ops Caffe2Backend::CreatePadPool(
      bool pads_flag = false;
      str += "[";
      for (const auto& i : pads) {
-        str += caffe2::to_string(i) + ",";
+        str += c10::to_string(i) + ",";
        pads_flag = pads_flag || i > 0;
      }
      str += "]";
--- a/caffe2/operators/CMakeLists.txt
+++ b/caffe2/operators/CMakeLists.txt
@ -40,7 +40,11 @@ file(GLOB tmp *.cc)
 file(GLOB tmp_cudnn *_cudnn.cc)
 exclude(tmp "${tmp}" ${tmp_cudnn})
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
-file(GLOB_RECURSE tmp experimental/c10/*.cc)
+
+if (BUILD_C10_EXPERIMENTAL_OPS)
+    file(GLOB_RECURSE tmp experimental/c10/*.cc)
+endif()
+
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
 # exclude test files and gpu files
 file(GLOB tmp *_test.cc)
--- a/caffe2/operators/bbox_transform_op.h
+++ b/caffe2/operators/bbox_transform_op.h
@ -35,7 +35,7 @@ class BBoxTransformOp final : public Operator<Context> {
    CAFFE_ENFORCE_EQ(
        weights_.size(),
        4,
-        "weights size " + caffe2::to_string(weights_.size()) + "must be 4.");
+        "weights size " + c10::to_string(weights_.size()) + "must be 4.");
  }
  USE_OPERATOR_CONTEXT_FUNCTIONS;

--- a/caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h
+++ b/caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h
@ -65,15 +65,15 @@ class CollectAndDistributeFpnRpnProposalsOp final : public Operator<Context> {
    CAFFE_ENFORCE_GE(
        roi_max_level_,
        roi_min_level_,
-        "roi_max_level " + caffe2::to_string(roi_max_level_) +
+        "roi_max_level " + c10::to_string(roi_max_level_) +
            " must be greater than or equal to roi_min_level " +
-            caffe2::to_string(roi_min_level_) + ".");
+            c10::to_string(roi_min_level_) + ".");
    CAFFE_ENFORCE_GE(
        rpn_max_level_,
        rpn_min_level_,
-        "rpn_max_level " + caffe2::to_string(rpn_max_level_) +
+        "rpn_max_level " + c10::to_string(rpn_max_level_) +
            " must be greater than or equal to rpn_min_level " +
-            caffe2::to_string(rpn_min_level_) + ".");
+            c10::to_string(rpn_min_level_) + ".");
  }

  ~CollectAndDistributeFpnRpnProposalsOp() {}
--- a/caffe2/operators/do_op.h
+++ b/caffe2/operators/do_op.h
@ -55,9 +55,9 @@ class DoOp final : public Operator<Context> {
          outer_blobs_idx[blob_idx] >= 0 &&
              outer_blobs_idx[blob_idx] < outer_blob_names.size(),
          "Invalid blob bindings: outer blob index (" +
-              caffe2::to_string(outer_blobs_idx[blob_idx]) + ", inner name: " +
+              c10::to_string(outer_blobs_idx[blob_idx]) + ", inner name: " +
              inner_blobs[blob_idx] + ") is out of bounds [0, " +
-              caffe2::to_string(outer_blob_names.size() - 1) + "]");
+              c10::to_string(outer_blob_names.size() - 1) + "]");
      const auto& outer_name = outer_blob_names[outer_blobs_idx[blob_idx]];
      CAFFE_ENFORCE(
          !used_outer_names.count(outer_name),
--- a/caffe2/operators/h_softmax_op.cc
+++ b/caffe2/operators/h_softmax_op.cc
@ -324,7 +324,7 @@ bool HSoftmaxSearchOp<float, CPUContext>::extractNodes(
    info.emplace_back(std::make_pair(n.name(), node.scores(i++)));
  }
  for (const int n : node.word_ids()) {
-    info.emplace_back(std::make_pair(caffe2::to_string(n), node.scores(i++)));
+    info.emplace_back(std::make_pair(c10::to_string(n), node.scores(i++)));
  }

  for (const auto& n : node.children()) {
--- a/caffe2/operators/onnx_while_op.h
+++ b/caffe2/operators/onnx_while_op.h
@ -34,7 +34,7 @@ class ONNXWhileOp final : public Operator<Context> {
        body_net_def_.set_name("loop_net");
      } else {
        ++counter;
-        body_net_def_.set_name("loop_net." + caffe2::to_string(counter));
+        body_net_def_.set_name("loop_net." + c10::to_string(counter));
      }
    }
  }
--- a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h
+++ b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h
@ -40,7 +40,7 @@ class RecurrentNetworkBlobFetcherOp final : public Operator<Context> {
        const auto& currentTensor = currentBlob->Get<Tensor>();

        std::string newBlobName =
-            prefix_ + std::string("_") + blob_name + caffe2::to_string(i);
+            prefix_ + std::string("_") + blob_name + c10::to_string(i);
        blob_names_vector.push_back(newBlobName);

        BlobGetMutableTensor(ws_->CreateBlob(newBlobName), CPU)
--- a/caffe2/operators/rnn/recurrent_network_executor.h
+++ b/caffe2/operators/rnn/recurrent_network_executor.h
@ -110,7 +110,7 @@ class RecurrentNetworkExecutorBase {
      // avoid conflicting timestep blobs when reusing workspaces, as with
      // the forward-only mode.
      std::string this_timestep_blob =
-          timestep_blob_ + "_rnnexec_t" + caffe2::to_string(t);
+          timestep_blob_ + "_rnnexec_t" + c10::to_string(t);
      BlobGetMutableTensor(ws->CreateBlob(this_timestep_blob), CPU)->Resize(1);
      auto b = ws->GetBlob(this_timestep_blob);
      CAFFE_ENFORCE(b);
--- a/caffe2/operators/segment_reduction_op.cc
+++ b/caffe2/operators/segment_reduction_op.cc
@ -10,8 +10,7 @@ OpSchema::Cost CostInferenceForSparseLengths(
  CAFFE_ENFORCE_GE(
      inputs.size(),
      min_num_of_inputs,
-      def.type() + " requires at least " +
-          caffe2::to_string(min_num_of_inputs));
+      def.type() + " requires at least " + c10::to_string(min_num_of_inputs));

  const TensorShape data = inputs[0];
  const TensorShape indices = inputs[1 + use_weight];
--- a/caffe2/opt/backend_cutting.cc
+++ b/caffe2/opt/backend_cutting.cc
@ -44,8 +44,8 @@ void DumpGraph(NNGraph* g) {
    assert(node->data() && "Node doesn't have data, can't render it");
    if (isa<NeuralNetOperator>(node->data())) {
      auto* op = dyn_cast<NeuralNetOperator>(node->data().get());
-      labelMap["label"] = op->getName() + " (" +
-          caffe2::to_string((unsigned long long)node) + ")";
+      labelMap["label"] =
+          op->getName() + " (" + c10::to_string((unsigned long long)node) + ")";
      auto* annotation = op->getAnnotation();
      if (annotation && isa<Caffe2Annotation>(annotation)) {
        auto device_annotation = dyn_cast<Caffe2Annotation>(annotation);
@ -60,8 +60,8 @@ void DumpGraph(NNGraph* g) {
    } else if (isa<Data>(node->data())) {
      auto tensor = dyn_cast<NeuralNetData>(node->data().get());
      labelMap["label"] = tensor->getName();
-      labelMap["label"] += "_" + caffe2::to_string(tensor->getVersion()) + " " +
-          caffe2::to_string((unsigned long long)node);
+      labelMap["label"] += "_" + c10::to_string(tensor->getVersion()) + " " +
+          c10::to_string((unsigned long long)node);
    }
    return labelMap;
  };
--- a/caffe2/opt/backend_cutting_test.cc
+++ b/caffe2/opt/backend_cutting_test.cc
@ -11,10 +11,10 @@ namespace {
  void AddConv(caffe2::NetDef* net, int tick) {
    auto* op = net->add_op();
    op->set_type("MyConv");
-    op->add_input("N" + caffe2::to_string(tick));
-    op->add_input("W" + caffe2::to_string(tick));
-    op->add_input("b" + caffe2::to_string(tick));
-    op->add_output("N" + caffe2::to_string(tick+1));
+    op->add_input("N" + c10::to_string(tick));
+    op->add_input("W" + c10::to_string(tick));
+    op->add_input("b" + c10::to_string(tick));
+    op->add_output("N" + c10::to_string(tick + 1));
  }

  bool Supports(const caffe2::OperatorDef& op) {
--- a/caffe2/opt/converter_nomigraph_test.cc
+++ b/caffe2/opt/converter_nomigraph_test.cc
@ -16,7 +16,7 @@ TEST(Converter, Basic) {
      caffe2::OperatorDef *def = net.add_op();
      def->set_type("Conv");
      def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i)); // different weights
+      def->add_input("W" + c10::to_string(i)); // different weights
      ADD_ARG(def, "kernel", i, 3);
      ADD_ARG(def, "stride", i, 1);
      ADD_ARG(def, "pad", i, 0);
@ -42,8 +42,8 @@ TEST(Converter, UnknownType) {
  def->set_type("NeverSeen");
  def->add_input("X");
  def->add_output("X");
-  def->mutable_device_option()->set_node_name("device_" +
-      caffe2::to_string(rand() % 2));
+  def->mutable_device_option()->set_node_name(
+      "device_" + c10::to_string(rand() % 2));
  auto nn = caffe2::convertToNNModule(net);
  auto new_netdef = caffe2::convertToCaffe2Proto(nn);
 }
--- a/caffe2/opt/device_test.cc
+++ b/caffe2/opt/device_test.cc
@ -20,8 +20,8 @@ TEST(DeviceTest, InsertCopies) {
      caffe2::OperatorDef* def = net.add_op();
      def->set_type("Conv");
      def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i));
-      def->add_input("b" + caffe2::to_string(i));
+      def->add_input("W" + c10::to_string(i));
+      def->add_input("b" + c10::to_string(i));
      ADD_ARG(def, "kernel", i, 3);
      ADD_ARG(def, "stride", i, 1);
      ADD_ARG(def, "pad", i, 0);
--- a/caffe2/opt/mobile_test.cc
+++ b/caffe2/opt/mobile_test.cc
@ -18,8 +18,8 @@ TEST(MobileTest, Convolution) {
      caffe2::OperatorDef* def = net.add_op();
      def->set_type("Conv");
      def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i));
-      def->add_input("b" + caffe2::to_string(i));
+      def->add_input("W" + c10::to_string(i));
+      def->add_input("b" + c10::to_string(i));
      ADD_ARG(def, "kernel", i, 3);
      ADD_ARG(def, "stride", i, 1);
      ADD_ARG(def, "pad", i, 0);
--- a/caffe2/predictor/emulator/data_filler.cc
+++ b/caffe2/predictor/emulator/data_filler.cc
@ -67,14 +67,14 @@ DataRandomFiller::DataRandomFiller(
    const auto& op_types = input_types[i];
    CAFFE_ENFORCE(
        op_dims.size() == op.input_size(),
-        op.name() + " has " + caffe2::to_string(op.input_size()) +
+        op.name() + " has " + c10::to_string(op.input_size()) +
            " inputs; while the input dimension size is " +
-            caffe2::to_string(op_dims.size()));
+            c10::to_string(op_dims.size()));
    CAFFE_ENFORCE(
        op_types.size() == op.input_size(),
-        op.name() + " has " + caffe2::to_string(op.input_size()) +
+        op.name() + " has " + c10::to_string(op.input_size()) +
            " inputs; while the input type size is " +
-            caffe2::to_string(op_types.size()));
+            c10::to_string(op_types.size()));

    for (size_t j = 0; j < op.input_size(); ++j) {
      inputs_[op.input(j)] =
--- a/caffe2/predictor/emulator/std_output_formatter.h
+++ b/caffe2/predictor/emulator/std_output_formatter.h
@ -33,10 +33,10 @@ class StdOutputFormatter : public OutputFormatter {
    auto mean = get_mean(durations_ms);
    auto throughput = iterations / (mean / MS_IN_SECOND);
    return std::string("\n\n====================================\n") +
-        "Predictor benchmark finished with " + caffe2::to_string(threads) +
-        " threads.\nThroughput:\t\t" + caffe2::to_string(throughput) +
+        "Predictor benchmark finished with " + c10::to_string(threads) +
+        " threads.\nThroughput:\t\t" + c10::to_string(throughput) +
        " iterations/s\nVariation:\t\t" +
-        caffe2::to_string(get_stdev(durations_ms) * 100 / mean) +
+        c10::to_string(get_stdev(durations_ms) * 100 / mean) +
        "%\n====================================";
  }
 };
--- a/caffe2/queue/queue_ops.h
+++ b/caffe2/queue/queue_ops.h
@ -105,8 +105,8 @@ class SafeEnqueueBlobsOp final : public Operator<Context> {
    auto size = queue->getNumBlobs();
    CAFFE_ENFORCE(
        OutputSize() == size + 1,
-        "Expected " + caffe2::to_string(size + 1) + ", " +
-            " got: " + caffe2::to_string(size));
+        "Expected " + c10::to_string(size + 1) + ", " +
+            " got: " + c10::to_string(size));
    bool status = queue->blockingWrite(this->Outputs());
    Output(size)->Resize();
    math::Set<bool, Context>(
--- a/caffe2/serialize/inline_container.h
+++ b/caffe2/serialize/inline_container.h
@ -112,7 +112,7 @@ class PyTorchStreamReader final {
        file_size_ % kFieldAlignment == 0,
        "File length is not a multiple of the alignment"
        " size. Is this a valid PyTorch model file? File size: ",
-        caffe2::to_string(file_size_));
+        c10::to_string(file_size_));
    readAndValidateFileHeader();
  }

@ -209,9 +209,9 @@ class PyTorchStreamReader final {
    AT_ASSERTM(
        file_format_version >= kMinSupportedFileFormatVersion,
        "Attempted to read a PyTorch file with version ",
-        caffe2::to_string(file_format_version),
+        c10::to_string(file_format_version),
        ", but the minimum supported version for reading is ",
-        caffe2::to_string(kMinSupportedFileFormatVersion),
+        c10::to_string(kMinSupportedFileFormatVersion),
        ". Your PyTorch script module file is too old. Please re-export it again.");
    AT_ASSERTM(
        file_format_version <= kMaxSupportedFileFormatVersion,
--- a/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc
+++ b/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc
@ -29,7 +29,7 @@ uint8_t* GetMutableData(int type_index, TensorCPU* tensor) {
  CAFFE_ENFORCE_EQ(
      gTypeMapper.count(type_index),
      1,
-      "Invalid type index " + caffe2::to_string(type_index) + ".");
+      "Invalid type index " + c10::to_string(type_index) + ".");
  return gTypeMapper.at(type_index)(tensor);
 }

--- a/caffe2/transforms/pattern_net_transform.h
+++ b/caffe2/transforms/pattern_net_transform.h
@ -124,7 +124,7 @@ class CAFFE2_API PatternNetTransform : public Transform {
  bool argument_match_ = false;

  const string TransformBlobWrapper(const string& blob_name) {
-    return "transform/" + blob_name + "_" + caffe2::to_string(ssa_id_);
+    return "transform/" + blob_name + "_" + c10::to_string(ssa_id_);
  }

  int ssa_id_ = 0;
--- a/caffe2/utils/fatal_signal_asan_no_sig_test.cc
+++ b/caffe2/utils/fatal_signal_asan_no_sig_test.cc
@ -102,7 +102,7 @@ bool forkAndPipe(
    }));                                                                     \
    int keyPhraseCount = 0;                                                  \
    std::string keyPhrase =                                                  \
-        std::string(name) + "(" + caffe2::to_string(signum) + "), Thread";   \
+        std::string(name) + "(" + c10::to_string(signum) + "), Thread";      \
    size_t loc = 0;                                                          \
    while ((loc = stderrBuffer.find(keyPhrase, loc)) != std::string::npos) { \
      keyPhraseCount += 1;                                                   \
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -421,6 +421,9 @@ if(USE_OPENCV)
  if(OpenCV_FOUND)
    include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
    list(APPEND Caffe2_DEPENDENCY_LIBS ${OpenCV_LIBS})
+    if (MSVC AND USE_CUDA)
+        list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${OpenCV_LIBS})
+    endif()
    message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})")
  else()
    message(WARNING "Not compiling with OpenCV. Suppress this warning with -DUSE_OPENCV=OFF")
@ -933,12 +936,6 @@ if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
    set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE})
  endif()
  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
-  # We will build onnx as static libs and embed it directly into the binary.
-  if (MSVC AND BUILD_SHARED_LIBS)
-    # That also means we want to export all symbols from the shared
-    # library we are building
-    set(ONNX_BUILD_MAIN_LIB ON)
-  endif()
  set(BUILD_SHARED_LIBS OFF)
  set(ONNX_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
  set(ONNX_USE_LITE_PROTO ${CAFFE2_USE_LITE_PROTO})
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@ -1,9 +1,4 @@
-# ---[ Add modules
-# TODO(orionr): Enable Detectron ops for Windows DLL when we
-# can figure out how to get it to build
-if (NOT (MSVC AND BUILD_SHARED_LIBS))
-  add_subdirectory(detectron)
-endif()
+add_subdirectory(detectron)
 add_subdirectory(module_test)
 add_subdirectory(observers)
 add_subdirectory(rocksdb)
--- a/modules/observers/net_observer_reporter_print.cc
+++ b/modules/observers/net_observer_reporter_print.cc
@ -19,24 +19,22 @@ void NetObserverReporterPrint::report(
  for (auto& p : info) {
    if ((p.first == "NET_DELAY") && (info.size() == 1)) {
      // for Net_delay perf
-      caffe2_perf.push_back(
-          {{"type", "NET"},
-           {"value", caffe2::to_string(p.second.latency * 1000)},
-           {"unit", "us"},
-           {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", "NET"},
+                             {"value", c10::to_string(p.second.latency * 1000)},
+                             {"unit", "us"},
+                             {"metric", "latency"}});
    } else if (p.first != "NET_DELAY") {
      // for operator perf
      std::string shape_str = get_tensor_shapes(p.second);
      std::string args_str = get_op_args(p.second);

-      caffe2_perf.push_back(
-          {{"type", p.first},
-           {"value", caffe2::to_string(p.second.latency * 1000)},
-           {"unit", "us"},
-           {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", p.first},
+                             {"value", c10::to_string(p.second.latency * 1000)},
+                             {"unit", "us"},
+                             {"metric", "latency"}});
      if (p.second.flops > 0) {
        caffe2_perf.push_back({{"type", p.first},
-                               {"value", caffe2::to_string(p.second.flops)},
+                               {"value", c10::to_string(p.second.flops)},
                               {"unit", "flop"},
                               {"metric", "flops"}});
      }
--- a/modules/observers/perf_observer.cc
+++ b/modules/observers/perf_observer.cc
@ -167,7 +167,7 @@ caffe2::string PerfNetObserver::getObserverName(const OperatorBase* op, int idx)
                                                : "NO_OUTPUT")
                           : "NO_DEF");
  caffe2::string name =
-      "ID_" + caffe2::to_string(idx) + "_" + opType + "_" + displayName;
+      "ID_" + c10::to_string(idx) + "_" + opType + "_" + displayName;
  return name;
 }

--- a/torch/csrc/jit/export.cpp
+++ b/torch/csrc/jit/export.cpp
@ -984,10 +984,10 @@ class ScriptModuleSerializer final {
      } else {
        record_id = writer_.writeRecord(tensor.storage().data(), record_size);
      }
-      external_data->set_record_id(caffe2::to_string(record_id));
+      external_data->set_record_id(c10::to_string(record_id));
      storageMap_[key] = record_id;
    } else {
-      external_data->set_record_id(caffe2::to_string(it->second));
+      external_data->set_record_id(c10::to_string(it->second));
    }
    // TODO handle device case, set the device_detail and load to CUDA device
  }
--- a/torch/csrc/jit/import.cpp
+++ b/torch/csrc/jit/import.cpp
@ -473,7 +473,7 @@ class ScriptModuleDeserializer final {
    }
    auto type = at::typeMetaToScalarType(
        caffe2::DataTypeToTypeMeta(tensor_proto.data_type()));
-    uint64_t record_id = caffe2::stoull(external_data.record_id());
+    uint64_t record_id = c10::stoull(external_data.record_id());
    AT_ASSERT(record_id != 0);
    auto storage_it = storageMap_.find(record_id);
    if (storage_it == storageMap_.end()) {