Remove BUILD_CAFFE2 and build everything (#8338)

Summary: This completely removes BUILD_CAFFE2 from CMake. There is still a little bit of "full build" stuff in setup.py that enables USE_CUDNN and BUILD_PYTHON, but otherwise everything should be enabled for PyTorch as well as Caffe2. This gets us a lot closer to full unification. cc mingzhe09088, pjh5, ezyang, smessmer, Yangqing Pull Request resolved: https://github.com/pytorch/pytorch/pull/8338 Reviewed By: mingzhe09088 Differential Revision: D9600513 Pulled By: orionr fbshipit-source-id: 9f6ca49df35b920d3439dcec56e7b26ad4768b7d
2025-10-20 21:14:14 +08:00 · 2018-08-31 13:08:20 -07:00
parent a2a584f347
commit 6508db7421
35 changed files with 519 additions and 502 deletions
--- a/.jenkins/caffe2/build.sh
+++ b/.jenkins/caffe2/build.sh
@ -218,13 +218,21 @@ if [[ -z "$INTEGRATED" ]]; then

 else

+  # sccache will be stuck if  all cores are used for compiling
+  # see https://github.com/pytorch/pytorch/pull/7361
+  if [[ -n "${SCCACHE}" ]]; then
+    export MAX_JOBS=`expr $(nproc) - 1`
+  fi
+
  FULL_CAFFE2=1 python setup.py install --user
-  # TODO: I'm not sure why this is necessary
+
+  # This is to save test binaries for testing
  cp -r torch/lib/tmp_install $INSTALL_PREFIX

-fi
+  ls $INSTALL_PREFIX

-report_compile_cache_stats
+  report_compile_cache_stats
+fi


 ###############################################################################
--- a/.jenkins/pytorch/common.sh
+++ b/.jenkins/pytorch/common.sh
@ -112,8 +112,7 @@ else
  exit 1
 fi

-if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-xenial-cuda9-cudnn7-py3 ]] || \
-   [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-trusty-py3.6-gcc7* ]]; then
+if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-trusty-py3.6-gcc7* ]]; then
  BUILD_TEST_LIBTORCH=1
 else
  BUILD_TEST_LIBTORCH=0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -54,7 +54,6 @@ endif()
 # cmake/Summary.cmake so that the summary prints out the option values.
 include(CMakeDependentOption)
 option(BUILD_TORCH "Build Torch" OFF)
-option(BUILD_CAFFE2 "Build Caffe2" ON)
 option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
 option(BUILD_ATEN_MOBILE "Build ATen for Android and iOS" OFF)
 option(BUILD_BINARY "Build C++ binaries" ON)
@ -68,9 +67,7 @@ cmake_dependent_option(
 cmake_dependent_option(
    CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
    "NOT BUILD_SHARED_LIBS" OFF)
-cmake_dependent_option(
-    BUILD_TEST "Build Caffe2 C++ test binaries (need gtest and gbenchmark)" OFF
-    "BUILD_CAFFE2" OFF)
+option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
 cmake_dependent_option(
    INSTALL_TEST "Install test binaries if BUILD_TEST is on" OFF
    "BUILD_TEST" OFF)
@ -83,32 +80,16 @@ cmake_dependent_option(
    USE_CUDNN "Use cuDNN" ON
    "USE_CUDA" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
-cmake_dependent_option(
-    USE_GFLAGS "Use GFLAGS" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_GLOG "Use GLOG" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_GLOO "Use Gloo" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_GFLAGS "Use GFLAGS" ON)
+option(USE_GLOG "Use GLOG" ON)
+option(USE_GLOO "Use Gloo" ON)
 option(USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed support" OFF)
-cmake_dependent_option(
-    USE_LEVELDB "Use LEVELDB" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_LEVELDB "Use LEVELDB" ON)
 option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
-cmake_dependent_option(
-    USE_LMDB "Use LMDB" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_METAL "Use Metal for iOS build" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_MPI "Use MPI" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_LMDB "Use LMDB" ON)
+option(USE_METAL "Use Metal for iOS build" ON)
+option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON)
+option(USE_MPI "Use MPI" ON)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
 option(USE_NCCL "Use NCCL" ON)
 option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF)
@ -121,9 +102,7 @@ cmake_dependent_option(
    "USE_CUDA" OFF)
 option(USE_OBSERVERS "Use observers module." OFF)
 option(USE_OPENCL "Use OpenCL" OFF)
-cmake_dependent_option(
-    USE_OPENCV "Use OpenCV" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_OPENCV "Use OpenCV" ON)
 option(USE_OPENMP "Use OpenMP for parallel code" OFF)
 option(USE_PROF "Use profiling" OFF)
 option(USE_REDIS "Use Redis" OFF)
@ -133,12 +112,8 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
 option(USE_ZMQ "Use ZMQ" OFF)
 option(USE_ZSTD "Use ZSTD" OFF)
 option(USE_MKLDNN "Use MKLDNN" OFF)
-cmake_dependent_option(
-  USE_IDEEP "Use IDEEP interface in MKL BLAS" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-  USE_MKLML "Use MKLML interface in MKL BLAS" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
+option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
 option(USE_DISTRIBUTED "Use THD (distributed)" OFF)

 # Used when building Caffe2 through setup.py
@ -218,6 +193,9 @@ if(NOT MSVC)
  if (CMAKE_COMPILER_IS_GNUCXX AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0))
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
  endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pedantic")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-decls")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=old-style-cast")
  # These flags are not available in GCC-4.8.5. Set only when using clang.
  # Compared against https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/Option-Summary.html
  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@ -240,6 +218,10 @@ if(NOT MSVC)
  if ($ENV{WERROR})
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
  endif($ENV{WERROR})
+  if (NOT APPLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized")
+  endif()
 else()
  foreach(flag_var
      CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
@ -266,6 +248,17 @@ if (USE_ASAN)
    set (CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_STATIC_LINKER_FLAGS_DEBUG} -fsanitize=address")
 endif()

+if (APPLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++14-extensions")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constexpr-not-const")
+endif()
+
+if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0.0)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
+endif()
+
 if(ANDROID)
  if(CMAKE_COMPILER_IS_GNUCXX)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s")
@ -402,8 +395,10 @@ else()
 endif()

 # ---[ Modules
-if (BUILD_CAFFE2)
-  add_subdirectory(modules)
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+add_subdirectory(modules)
 endif()

 # ---[ Binaries
@ -411,10 +406,12 @@ endif()
 # are built. For the binaries, they will be linked to the Caffe2 main
 # libraries, as well as all the modules that are built with Caffe2 (the ones
 # built in the previous Modules section above).
-if (BUILD_CAFFE2)
-  if (BUILD_BINARY)
-    add_subdirectory(binaries)
-  endif()
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+if (BUILD_BINARY)
+  add_subdirectory(binaries)
+endif()
 endif()

 include(cmake/Summary.cmake)
--- a/aten/src/ATen/core/typeid.h
+++ b/aten/src/ATen/core/typeid.h
@ -404,7 +404,7 @@ inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept {
 #ifdef _MSC_VER
 #define CAFFE_KNOWN_TYPE(T)                                               \
  template <>                                                             \
-  AT_CORE_API TypeIdentifier TypeMeta::Id<T>() {                          \
+  AT_CORE_EXPORT TypeIdentifier TypeMeta::Id<T>() {                       \
    static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \
    static TypeNameRegisterer<T> registerer(type_id, #T);                 \
    return type_id;                                                       \
--- a/aten/src/ATen/cuda/CUDAStream.h
+++ b/aten/src/ATen/cuda/CUDAStream.h
@ -5,7 +5,7 @@

 #include "cuda_runtime_api.h"

-#include <ATen/core/ATenGeneral.h>
+#include <ATen/cuda/ATenCUDAGeneral.h>

 /*
 * A CUDAStream interface. See CUDAStream.cpp for implementation details.
@ -17,7 +17,7 @@

 /*
 * Stream pool note.
-* 
+*
 * A CUDAStream is an abstraction of an actual cuStream on the GPU. CUDAStreams
 * are backed by cuStreams, but they use several pools to minimize the costs
 * associated with creating, retaining, and destroying cuStreams.
@ -27,14 +27,14 @@
 * The first pool contains only the default stream. When the default stream
 * is requested it's returned.
 *
-* The second pool is the "low priority" or "default priority" streams. In 
+* The second pool is the "low priority" or "default priority" streams. In
 * HIP builds there is no distinction between streams in this pool and streams
-* in the third pool (below). There are 32 of these streams per device, and 
+* in the third pool (below). There are 32 of these streams per device, and
 * when a stream is requested one of these streams is returned round-robin.
 * That is, the first stream requested is at index 0, the second at index 1...
 * to index 31, then index 0 again.
 *
-* This means that if 33 low priority streams are requested, the first and 
+* This means that if 33 low priority streams are requested, the first and
 * last streams requested are actually the same stream (under the covers)
 * and kernels enqueued on them cannot run concurrently.
 *
@ -46,7 +46,7 @@
 * many longer-lived streams are required in performance critical scenarios
 * then the functionality here may need to be extended to allow, for example,
 * "reserving" a subset of the pool so that other streams do not accidentally
-* overlap the performance critical streams. 
+* overlap the performance critical streams.
 */

 struct CUDAStreamInternals;
@ -59,19 +59,19 @@ struct CUDAEvent;
 namespace detail {

 // Pointer-based API (for internal use)
-AT_API CUDAStreamInternals* CUDAStream_getDefaultStream(int64_t device = -1);
+AT_CUDA_API CUDAStreamInternals* CUDAStream_getDefaultStream(int64_t device = -1);

-AT_API CUDAStreamInternals* CUDAStream_createStream(
+AT_CUDA_API CUDAStreamInternals* CUDAStream_createStream(
  const bool isHighPriority = false
 , int64_t device = -1);

-AT_API CUDAStreamInternals* CUDAStream_getCurrentStream(int64_t device = -1);
+AT_CUDA_API CUDAStreamInternals* CUDAStream_getCurrentStream(int64_t device = -1);

-AT_API void CUDAStream_setStream(CUDAStreamInternals* internals);
-AT_API void CUDAStream_uncheckedSetStream(CUDAStreamInternals* internals);
+AT_CUDA_API void CUDAStream_setStream(CUDAStreamInternals* internals);
+AT_CUDA_API void CUDAStream_uncheckedSetStream(CUDAStreamInternals* internals);

-AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
-AT_API int64_t CUDAStream_device(CUDAStreamInternals*);
+AT_CUDA_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
+AT_CUDA_API int64_t CUDAStream_device(CUDAStreamInternals*);

 } // namespace detail

@ -81,7 +81,7 @@ struct CUDAStream {

  // Constructors
  CUDAStream() = default;
-  /* implicit */ CUDAStream(CUDAStreamInternals* internals_in) 
+  /* implicit */ CUDAStream(CUDAStreamInternals* internals_in)
  : internals_{internals_in} { }

  // Returns true if the CUDAStream is not null.
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -3,9 +3,6 @@ include(../cmake/Codegen.cmake)

 # ---[ Declare source file lists

-# ---[ Shared build
-add_subdirectory(utils)
-
 # ---[ ATen build
 if (NOT BUILD_ATEN_MOBILE)
  set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
@ -14,25 +11,23 @@ if (NOT BUILD_ATEN_MOBILE)
  add_subdirectory(../aten aten)
  set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})

-  if(BUILD_CAFFE2)
-    # Generate the headers wrapped by our operator
-    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h
-    COMMAND
-    ${PYCMD} ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
-      --aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../aten
-      --template_dir=${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten
-      --yaml_dir=${CMAKE_CURRENT_BINARY_DIR}/../aten/src/ATen
-      --install_dir=${CMAKE_CURRENT_BINARY_DIR}/contrib/aten
-    DEPENDS
-    ATEN_CPU_FILES_GEN_TARGET
-    ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
-    ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/aten_op_template.h)
+  # Generate the headers wrapped by our operator
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h
+  COMMAND
+  ${PYCMD} ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
+    --aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../aten
+    --template_dir=${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten
+    --yaml_dir=${CMAKE_CURRENT_BINARY_DIR}/../aten/src/ATen
+    --install_dir=${CMAKE_CURRENT_BINARY_DIR}/contrib/aten
+  DEPENDS
+  ATEN_CPU_FILES_GEN_TARGET
+  ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
+  ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/aten_op_template.h)

-    add_custom_target(__aten_op_header_gen
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h)
-    add_library(aten_op_header_gen INTERFACE)
-    add_dependencies(aten_op_header_gen __aten_op_header_gen)
-  endif()
+  add_custom_target(__aten_op_header_gen
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h)
+  add_library(aten_op_header_gen INTERFACE)
+  add_dependencies(aten_op_header_gen __aten_op_header_gen)

  # Add source, includes, and libs to lists
  list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS})
@ -68,39 +63,44 @@ if(BUILD_TORCH)
 endif()

 # ---[ Caffe2 build
-if(BUILD_CAFFE2)
-  # Note: the folders that are being commented out have not been properly
-  # addressed yet.
-  add_subdirectory(proto)
-  add_subdirectory(contrib)
-  add_subdirectory(core)
-  add_subdirectory(predictor)
-  add_subdirectory(core/nomnigraph)
-  add_subdirectory(core/dispatch)
-  if (USE_NVRTC)
-    add_subdirectory(cuda_rtc)
-  endif()
-  add_subdirectory(db)
-  add_subdirectory(distributed)
-  # add_subdirectory(experiments) # note, we may remove this folder at some point
-  add_subdirectory(ideep)
-  add_subdirectory(image)
-  add_subdirectory(video)
-  add_subdirectory(mkl)
-  add_subdirectory(mobile)
-  add_subdirectory(mpi)
-  add_subdirectory(observers)
-  add_subdirectory(onnx)
-  add_subdirectory(operators)
-  add_subdirectory(operators/rnn)
-  add_subdirectory(opt)
-  add_subdirectory(perfkernels)
-  add_subdirectory(python)
-  add_subdirectory(queue)
-  add_subdirectory(sgd)
-  add_subdirectory(share)
-  # add_subdirectory(test) # todo: use caffe2_gtest_main instead of gtest_main because we will need to call GlobalInit
-  add_subdirectory(transforms)
+# Note: the folders that are being commented out have not been properly
+# addressed yet.
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (MSVC AND BUILD_SHARED_LIBS)
+add_subdirectory(utils)
+else()
+add_subdirectory(proto)
+add_subdirectory(contrib)
+add_subdirectory(core)
+add_subdirectory(utils)
+add_subdirectory(predictor)
+add_subdirectory(core/nomnigraph)
+add_subdirectory(core/dispatch)
+if (USE_NVRTC)
+  add_subdirectory(cuda_rtc)
+endif()
+add_subdirectory(db)
+add_subdirectory(distributed)
+# add_subdirectory(experiments) # note, we may remove this folder at some point
+add_subdirectory(ideep)
+add_subdirectory(image)
+add_subdirectory(video)
+add_subdirectory(mkl)
+add_subdirectory(mobile)
+add_subdirectory(mpi)
+add_subdirectory(observers)
+add_subdirectory(onnx)
+add_subdirectory(operators)
+add_subdirectory(operators/rnn)
+add_subdirectory(opt)
+add_subdirectory(perfkernels)
+add_subdirectory(python)
+add_subdirectory(queue)
+add_subdirectory(sgd)
+add_subdirectory(share)
+# add_subdirectory(test) # todo: use caffe2_gtest_main instead of gtest_main because we will need to call GlobalInit
+add_subdirectory(transforms)
 endif()

 # Advanced: if we have white list specified, we will do intersections for all
@ -166,13 +166,15 @@ if (FALSE)
 endif()

 # ---[ List of libraries to link with
-if (BUILD_CAFFE2)
-  add_library(caffe2_protos STATIC $<TARGET_OBJECTS:Caffe2_PROTO>)
-  add_dependencies(caffe2_protos Caffe2_PROTO)
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+add_library(caffe2_protos STATIC $<TARGET_OBJECTS:Caffe2_PROTO>)
+add_dependencies(caffe2_protos Caffe2_PROTO)
 else()
-  # Do not include caffe2 or caffe protos, but rather have it only be
-  # a library to attach local protobuf.
-  add_library(caffe2_protos STATIC utils/dummy.cpp)
+# Do not include caffe2 or caffe protos, but rather have it only be
+# a library to attach local protobuf.
+add_library(caffe2_protos STATIC utils/dummy.cpp)
 endif()
 # If we are going to link protobuf locally inside caffe2 libraries, what we will do is
 # to create a helper static library that always contains libprotobuf source files, and
@ -207,7 +209,9 @@ target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf)
 # Compile exposed libraries.
 list(APPEND Caffe2_CPU_SRCs $<TARGET_OBJECTS:c10>)
 add_library(caffe2 ${Caffe2_CPU_SRCS})
-target_compile_options(caffe2 PRIVATE "-fvisibility=hidden")
+if (NOT WIN32)
+  target_compile_options(caffe2 PRIVATE "-fvisibility=hidden")
+endif()
 caffe2_interface_library(caffe2_protos caffe2_protos_whole)
 target_link_libraries(caffe2 PRIVATE caffe2_protos_whole)
 if (${CAFFE2_LINK_LOCAL_PROTOBUF})
@ -351,7 +355,7 @@ endif()
 # ---[ Caffe2 HIP sources.
 if(USE_ROCM)
  # Call again since Caffe2_HIP_INCLUDES is extended with ATen include dirs.
-  # Get Compile Definitions from the directory (FindHIP.CMake bug)
+  # Get Compile Definitions from the directory (FindHIP.cmake bug)
  get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS)
  if(MY_DEFINITIONS)
    foreach(_item ${MY_DEFINITIONS})
@ -397,16 +401,33 @@ if ($ENV{WERROR})
 endif()

 # ---[ Test binaries.
-if(BUILD_CAFFE2)
-  if (BUILD_TEST)
-    set(Caffe2_ALL_TEST_SRCS ${Caffe2_CPU_TEST_SRCS})
-    if (USE_CUDA)
-      list(APPEND Caffe2_ALL_TEST_SRCS ${Caffe2_GPU_TEST_SRCS})
-    endif()
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+if (BUILD_TEST)
+  set(Caffe2_ALL_TEST_SRCS ${Caffe2_CPU_TEST_SRCS})
+  if (USE_CUDA)
+    list(APPEND Caffe2_ALL_TEST_SRCS ${Caffe2_GPU_TEST_SRCS})
+  endif()

-    foreach(test_src ${Caffe2_ALL_TEST_SRCS})
+  foreach(test_src ${Caffe2_ALL_TEST_SRCS})
+    get_filename_component(test_name ${test_src} NAME_WE)
+    add_executable(${test_name} "${test_src}")
+    target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main)
+    if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.0)
+      target_compile_features(${test_name} PRIVATE cxx_range_for)
+    endif()
+    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+    if (INSTALL_TEST)
+      install(TARGETS ${test_name} DESTINATION test)
+    endif()
+  endforeach()
+
+  if(USE_ROCM)
+    foreach(test_src ${Caffe2_HIP_TEST_SRCS})
+      set_source_files_properties(${test_src} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
      get_filename_component(test_name ${test_src} NAME_WE)
-      add_executable(${test_name} "${test_src}")
+      hip_add_executable(${test_name} "${test_src}")
      target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main)
      if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.0)
        target_compile_features(${test_name} PRIVATE cxx_range_for)
@ -416,33 +437,12 @@ if(BUILD_CAFFE2)
        install(TARGETS ${test_name} DESTINATION test)
      endif()
    endforeach()
-
-    if(USE_ROCM)
-      foreach(test_src ${Caffe2_HIP_TEST_SRCS})
-        set_source_files_properties(${test_src} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
-        get_filename_component(test_name ${test_src} NAME_WE)
-        hip_add_executable(${test_name} "${test_src}")
-        target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main)
-        if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.0)
-          target_compile_features(${test_name} PRIVATE cxx_range_for)
-        endif()
-        add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-        if (INSTALL_TEST)
-          install(TARGETS ${test_name} DESTINATION test)
-        endif()
-      endforeach()
-    endif()
-
  endif()
 endif()
-
-set(__aten_test_dir "test")
-if(BUILD_CAFFE2)
-  # Aten tests should only run when Caffe2 is not built
-  set(__aten_test_dir "test/aten")
 endif()
-# Todo - Set up ATen tests for ROCm in an upcoming PR
-if(NOT USE_ROCM)
+
+set(__aten_test_dir "test/aten")
+if (NOT USE_ROCM)
  foreach(test_src ${ATen_CPU_TEST_SRCS})
    get_filename_component(test_name ${test_src} NAME_WE)
    add_executable(${test_name} "${test_src}")
@ -466,195 +466,197 @@ if(NOT USE_ROCM)
  endif()
 endif()

-if(BUILD_CAFFE2)
-  if (BUILD_PYTHON)
-    # Python site-packages
-    # Get canonical directory for python site packages (relative to install
-    # location).  It varies from system to system.
-    pycmd(PYTHON_SITE_PACKAGES "
-        from distutils import sysconfig
-        print(sysconfig.get_python_lib(prefix=''))
-    ")
-    SET(PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES} PARENT_SCOPE) # for Summary
-    # ---[ Options.
-    SET(PYTHON_LIB_REL_PATH "${PYTHON_SITE_PACKAGES}" CACHE STRING "Python installation path (relative to CMake installation prefix)")
-    message(STATUS "Using ${PYTHON_LIB_REL_PATH} as python relative installation path")
-    # Python extension suffix
-    # Try to get from python through sysconfig.get_env_var('EXT_SUFFIX') first,
-    # fallback to ".pyd" if windows and ".so" for all others.
-    pycmd(PY_EXT_SUFFIX "
-        from distutils import sysconfig
-        ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
-        print(ext_suffix if ext_suffix else '')
-    ")
-    if("${PY_EXT_SUFFIX}" STREQUAL "")
-      if (MSVC)
-        set(PY_EXT_SUFFIX ".pyd")
-      else()
-        set(PY_EXT_SUFFIX ".so")
-      endif()
-    endif()
-
-    # Allow different install locations for libcaffe2
-    # For setuptools installs (that all build Python), install libcaffe2 into
-    # site-packages, alongside the torch libraries. The pybind11 library needs
-    # an rpath to the torch library folder
-    # For cmake installs, including c++ only installs, install libcaffe2 into
-    # CMAKE_INSTALL_PREFIX/lib . The pybind11 library can have a hardcoded
-    # rpath
-    if(APPLE)
-      set(_rpath_portable_origin "@loader_path")
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+if (BUILD_PYTHON)
+  # Python site-packages
+  # Get canonical directory for python site packages (relative to install
+  # location).  It varies from system to system.
+  pycmd(PYTHON_SITE_PACKAGES "
+      from distutils import sysconfig
+      print(sysconfig.get_python_lib(prefix=''))
+  ")
+  SET(PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES} PARENT_SCOPE) # for Summary
+  # ---[ Options.
+  SET(PYTHON_LIB_REL_PATH "${PYTHON_SITE_PACKAGES}" CACHE STRING "Python installation path (relative to CMake installation prefix)")
+  message(STATUS "Using ${PYTHON_LIB_REL_PATH} as python relative installation path")
+  # Python extension suffix
+  # Try to get from python through sysconfig.get_env_var('EXT_SUFFIX') first,
+  # fallback to ".pyd" if windows and ".so" for all others.
+  pycmd(PY_EXT_SUFFIX "
+      from distutils import sysconfig
+      ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
+      print(ext_suffix if ext_suffix else '')
+  ")
+  if("${PY_EXT_SUFFIX}" STREQUAL "")
+    if (MSVC)
+      set(PY_EXT_SUFFIX ".pyd")
    else()
-      set(_rpath_portable_origin $ORIGIN)
-    endif(APPLE)
-    set(caffe2_pybind11_rpath "${_rpath_portable_origin}")
-    if(${BUILDING_WITH_TORCH_LIBS})
-      # site-packages/caffe2/python/caffe2_pybind11_state
-      # site-packages/torch/lib
-      set(caffe2_pybind11_rpath "${_rpath_portable_origin}/../../torch/lib")
-    endif(${BUILDING_WITH_TORCH_LIBS})
+      set(PY_EXT_SUFFIX ".so")
+    endif()
+  endif()
+
+  # Allow different install locations for libcaffe2
+  # For setuptools installs (that all build Python), install libcaffe2 into
+  # site-packages, alongside the torch libraries. The pybind11 library needs
+  # an rpath to the torch library folder
+  # For cmake installs, including c++ only installs, install libcaffe2 into
+  # CMAKE_INSTALL_PREFIX/lib . The pybind11 library can have a hardcoded
+  # rpath
+  if(APPLE)
+    set(_rpath_portable_origin "@loader_path")
+  else()
+    set(_rpath_portable_origin $ORIGIN)
+  endif(APPLE)
+  set(caffe2_pybind11_rpath "${_rpath_portable_origin}")
+  if(${BUILDING_WITH_TORCH_LIBS})
+    # site-packages/caffe2/python/caffe2_pybind11_state
+    # site-packages/torch/lib
+    set(caffe2_pybind11_rpath "${_rpath_portable_origin}/../../torch/lib")
+  endif(${BUILDING_WITH_TORCH_LIBS})


-    # ---[ Python.
-    add_library(caffe2_pybind11_state MODULE ${Caffe2_CPU_PYTHON_SRCS})
-    set_target_properties(caffe2_pybind11_state PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
-    set_target_properties(caffe2_pybind11_state PROPERTIES PREFIX "" DEBUG_POSTFIX "")
-    set_target_properties(caffe2_pybind11_state PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+  # ---[ Python.
+  add_library(caffe2_pybind11_state MODULE ${Caffe2_CPU_PYTHON_SRCS})
+  set_target_properties(caffe2_pybind11_state PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  set_target_properties(caffe2_pybind11_state PROPERTIES PREFIX "" DEBUG_POSTFIX "")
+  set_target_properties(caffe2_pybind11_state PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+  if (APPLE)
+    set_target_properties(caffe2_pybind11_state PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  endif()
+  target_link_libraries(
+      caffe2_pybind11_state caffe2_library)
+  if (WIN32)
+    target_link_libraries(caffe2_pybind11_state ${PYTHON_LIBRARIES})
+  endif(WIN32)
+
+  # Install caffe2_pybind11_state(_gpu|hip) in site-packages/caffe2/python,
+  # so it needs an rpath to find libcaffe2
+  set_target_properties(
+      caffe2_pybind11_state PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+      ${CMAKE_BINARY_DIR}/caffe2/python)
+  install(TARGETS caffe2_pybind11_state DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
+  set_target_properties(caffe2_pybind11_state PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
+
+  if(USE_CUDA)
+    add_library(caffe2_pybind11_state_gpu MODULE ${Caffe2_GPU_PYTHON_SRCS})
+    set_target_properties(caffe2_pybind11_state_gpu PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+    set_target_properties(caffe2_pybind11_state_gpu PROPERTIES PREFIX "" DEBUG_POSTFIX "")
+    set_target_properties(caffe2_pybind11_state_gpu PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
    if (APPLE)
-      set_target_properties(caffe2_pybind11_state PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+      set_target_properties(caffe2_pybind11_state_gpu PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
    endif()
    target_link_libraries(
-        caffe2_pybind11_state caffe2_library)
+        caffe2_pybind11_state_gpu caffe2_library caffe2_gpu_library)
    if (WIN32)
-      target_link_libraries(caffe2_pybind11_state ${PYTHON_LIBRARIES})
+      target_link_libraries(caffe2_pybind11_state_gpu ${PYTHON_LIBRARIES})
    endif(WIN32)

-    # Install caffe2_pybind11_state(_gpu|hip) in site-packages/caffe2/python,
-    # so it needs an rpath to find libcaffe2
+    # Install with same rpath as non-gpu caffe2_pybind11_state
    set_target_properties(
-        caffe2_pybind11_state PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+        caffe2_pybind11_state_gpu PROPERTIES LIBRARY_OUTPUT_DIRECTORY
        ${CMAKE_BINARY_DIR}/caffe2/python)
-    install(TARGETS caffe2_pybind11_state DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
-    set_target_properties(caffe2_pybind11_state PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
+    install(TARGETS caffe2_pybind11_state_gpu DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
+    set_target_properties(caffe2_pybind11_state_gpu PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
+  endif()

-    if(USE_CUDA)
-      add_library(caffe2_pybind11_state_gpu MODULE ${Caffe2_GPU_PYTHON_SRCS})
-      set_target_properties(caffe2_pybind11_state_gpu PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
-      set_target_properties(caffe2_pybind11_state_gpu PROPERTIES PREFIX "" DEBUG_POSTFIX "")
-      set_target_properties(caffe2_pybind11_state_gpu PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
-      if (APPLE)
-        set_target_properties(caffe2_pybind11_state_gpu PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-      endif()
-      target_link_libraries(
-          caffe2_pybind11_state_gpu caffe2_library caffe2_gpu_library)
-      if (WIN32)
-        target_link_libraries(caffe2_pybind11_state_gpu ${PYTHON_LIBRARIES})
-      endif(WIN32)
-
-      # Install with same rpath as non-gpu caffe2_pybind11_state
-      set_target_properties(
-          caffe2_pybind11_state_gpu PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-          ${CMAKE_BINARY_DIR}/caffe2/python)
-      install(TARGETS caffe2_pybind11_state_gpu DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
-      set_target_properties(caffe2_pybind11_state_gpu PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
+  if(USE_ROCM)
+    hip_add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS})
+    set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINKER_LANGUAGE HIP)
+    set_target_properties(caffe2_pybind11_state_hip PROPERTIES COMPILE_FLAGS "${HIP_HIPCC_FLAGS} -fvisibility=hidden")
+    set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
+    set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+    if (APPLE)
+      set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
    endif()
+    target_link_libraries(
+        caffe2_pybind11_state_hip caffe2_library caffe2_hip_library)
+    if (WIN32)
+      target_link_libraries(caffe2_pybind11_state_hip ${PYTHON_LIBRARIES})
+    endif(WIN32)

-    if(USE_ROCM)
-      hip_add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS})
-      set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINKER_LANGUAGE HIP)
-      set_target_properties(caffe2_pybind11_state_hip PROPERTIES COMPILE_FLAGS "${HIP_HIPCC_FLAGS} -fvisibility=hidden")
-      set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
-      set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
-      if (APPLE)
-        set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-      endif()
-      target_link_libraries(
-          caffe2_pybind11_state_hip caffe2_library caffe2_hip_library)
-      if (WIN32)
-        target_link_libraries(caffe2_pybind11_state_hip ${PYTHON_LIBRARIES})
-      endif(WIN32)
+    # Install with same rpath as non-hip caffe2_pybind11_state
+    set_target_properties(
+        caffe2_pybind11_state_hip PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+        ${CMAKE_BINARY_DIR}/caffe2/python)
+    install(TARGETS caffe2_pybind11_state_hip DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
+    set_target_properties(caffe2_pybind11_state_hip PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
+  endif()

-      # Install with same rpath as non-hip caffe2_pybind11_state
-      set_target_properties(
-          caffe2_pybind11_state_hip PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-          ${CMAKE_BINARY_DIR}/caffe2/python)
-      install(TARGETS caffe2_pybind11_state_hip DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
-      set_target_properties(caffe2_pybind11_state_hip PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
-    endif()
-
-    if (MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
-      # If we are building under windows, we will copy the file from
-      # build/caffe2/python/{Debug,Release}/caffe2_pybind11_state.pyd
-      # to its parent folder so that we can do in-build execution.
-      add_custom_target(windows_python_copy_lib ALL)
-      add_dependencies(windows_python_copy_lib caffe2_pybind11_state)
+  if (MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
+    # If we are building under windows, we will copy the file from
+    # build/caffe2/python/{Debug,Release}/caffe2_pybind11_state.pyd
+    # to its parent folder so that we can do in-build execution.
+    add_custom_target(windows_python_copy_lib ALL)
+    add_dependencies(windows_python_copy_lib caffe2_pybind11_state)
+    add_custom_command(
+        TARGET windows_python_copy_lib POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+        $<TARGET_FILE:caffe2_pybind11_state>
+        ${CMAKE_BINARY_DIR}/caffe2/python)
+    if (USE_CUDA)
+      add_dependencies(windows_python_copy_lib caffe2_pybind11_state_gpu)
      add_custom_command(
          TARGET windows_python_copy_lib POST_BUILD
          COMMAND ${CMAKE_COMMAND} -E copy
-          $<TARGET_FILE:caffe2_pybind11_state>
+          $<TARGET_FILE:caffe2_pybind11_state_gpu>
          ${CMAKE_BINARY_DIR}/caffe2/python)
-      if (USE_CUDA)
-        add_dependencies(windows_python_copy_lib caffe2_pybind11_state_gpu)
-        add_custom_command(
-            TARGET windows_python_copy_lib POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy
-            $<TARGET_FILE:caffe2_pybind11_state_gpu>
-            ${CMAKE_BINARY_DIR}/caffe2/python)
-      endif()
-      if (USE_ROCM)
-        add_dependencies(windows_python_copy_lib caffe2_pybind11_state_hip)
-        add_custom_command(
-            TARGET windows_python_copy_lib POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy
-            $<TARGET_FILE:caffe2_pybind11_state_hip>
-            ${CMAKE_BINARY_DIR}/caffe2/python)
-      endif()
    endif()
-
-    # Finally, Copy all python files to build directory
-    # Generate and create all needed __init__.py files, if they aren't already
-    # present in the current source tree.
-    message(STATUS "Automatically generating missing __init__.py files.")
-    caffe_autogen_init_py_files()
-
-    # Create a custom target that copies all python files.
-    file(GLOB_RECURSE PYTHON_SRCS RELATIVE ${PROJECT_SOURCE_DIR}
-         "${PROJECT_SOURCE_DIR}/caffe2/*.py")
-    add_custom_target(python_copy_files ALL)
-    if(MSVC OR CMAKE_GENERATOR MATCHES "Ninja")
-      # ninja fails when the command line is too long so we split
-      # the target into several. This would be beneficial for VS also
-      # since it build targets in parallel but not custom commands
-      foreach(python_src ${PYTHON_SRCS})
-        get_filename_component(dir ${python_src} DIRECTORY)
-        string(SHA1 name_hash "${python_src}")
-        # get_filename_component(name_we ${python_src} NAME_WE)
-        add_custom_target(python_copy_files_${name_hash}
-            COMMAND ${CMAKE_COMMAND} -E copy
-            ${PROJECT_SOURCE_DIR}/${python_src} ${CMAKE_BINARY_DIR}/${dir})
-        add_dependencies(python_copy_files python_copy_files_${name_hash})
-      endforeach()
-    else()
-      foreach(python_src ${PYTHON_SRCS})
-        get_filename_component(dir ${python_src} DIRECTORY)
-        add_custom_command(
-            TARGET python_copy_files PRE_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy
-            ${PROJECT_SOURCE_DIR}/${python_src} ${CMAKE_BINARY_DIR}/${dir})
-      endforeach()
+    if (USE_ROCM)
+      add_dependencies(windows_python_copy_lib caffe2_pybind11_state_hip)
+      add_custom_command(
+          TARGET windows_python_copy_lib POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy
+          $<TARGET_FILE:caffe2_pybind11_state_hip>
+          ${CMAKE_BINARY_DIR}/caffe2/python)
    endif()
-
-    # Install commands
-    # Pick up static python files
-    install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
-            FILES_MATCHING PATTERN "*.py")
-    # Caffe proto files
-    install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe DESTINATION ${PYTHON_LIB_REL_PATH}
-            FILES_MATCHING PATTERN "*.py")
-    # Caffe2 proto files
-    install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
-            FILES_MATCHING PATTERN "*.py")
  endif()
+
+  # Finally, Copy all python files to build directory
+  # Generate and create all needed __init__.py files, if they aren't already
+  # present in the current source tree.
+  message(STATUS "Automatically generating missing __init__.py files.")
+  caffe_autogen_init_py_files()
+
+  # Create a custom target that copies all python files.
+  file(GLOB_RECURSE PYTHON_SRCS RELATIVE ${PROJECT_SOURCE_DIR}
+       "${PROJECT_SOURCE_DIR}/caffe2/*.py")
+  add_custom_target(python_copy_files ALL)
+  if(MSVC OR CMAKE_GENERATOR MATCHES "Ninja")
+    # ninja fails when the command line is too long so we split
+    # the target into several. This would be beneficial for VS also
+    # since it build targets in parallel but not custom commands
+    foreach(python_src ${PYTHON_SRCS})
+      get_filename_component(dir ${python_src} DIRECTORY)
+      string(SHA1 name_hash "${python_src}")
+      # get_filename_component(name_we ${python_src} NAME_WE)
+      add_custom_target(python_copy_files_${name_hash}
+          COMMAND ${CMAKE_COMMAND} -E copy
+          ${PROJECT_SOURCE_DIR}/${python_src} ${CMAKE_BINARY_DIR}/${dir})
+      add_dependencies(python_copy_files python_copy_files_${name_hash})
+    endforeach()
+  else()
+    foreach(python_src ${PYTHON_SRCS})
+      get_filename_component(dir ${python_src} DIRECTORY)
+      add_custom_command(
+          TARGET python_copy_files PRE_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy
+          ${PROJECT_SOURCE_DIR}/${python_src} ${CMAKE_BINARY_DIR}/${dir})
+    endforeach()
+  endif()
+
+  # Install commands
+  # Pick up static python files
+  install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
+          FILES_MATCHING PATTERN "*.py")
+  # Caffe proto files
+  install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe DESTINATION ${PYTHON_LIB_REL_PATH}
+          FILES_MATCHING PATTERN "*.py")
+  # Caffe2 proto files
+  install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
+          FILES_MATCHING PATTERN "*.py")
+endif()
 endif()

 # Finally, set the Caffe2_MAIN_LIBS variable in the parent scope.
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@ -59,7 +59,10 @@ CAFFE2_DEFINE_int(

 namespace caffe2 {

-thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
+ThreadLocalCUDAObjects& CUDAContext::getCudaObjects() {
+  static thread_local ThreadLocalCUDAObjects cuda_objects_;
+  return cuda_objects_;
+}

 // TODO(jiayq): these variables shouldn't be currently accessed during static
 // initialization. We should consider moving them to a Mayer's singleton to
--- a/caffe2/core/context_gpu.h
+++ b/caffe2/core/context_gpu.h
@ -175,7 +175,7 @@ class CAFFE2_API CUDAContext final : public BaseContext {
  }

  void FinishDeviceComputation() override {
-    cudaStreamSynchronize(cuda_objects_.GetStream(gpu_id_, stream_id_));
+    cudaStreamSynchronize(getCudaObjects().GetStream(gpu_id_, stream_id_));
    cudaError_t error = cudaGetLastError();
    if (error != cudaSuccess) {
      CAFFE_THROW("Encountered CUDA error: ", cudaGetErrorString(error));
@ -195,16 +195,16 @@ class CAFFE2_API CUDAContext final : public BaseContext {
  }

  static cudaStream_t cuda_stream(int gpu_id, int stream_id) {
-    return cuda_objects_.GetStream(gpu_id, stream_id);
+    return getCudaObjects().GetStream(gpu_id, stream_id);
  }

  cublasHandle_t cublas_handle() {
-    return cuda_objects_.GetHandle(gpu_id_, stream_id_);
+    return getCudaObjects().GetHandle(gpu_id_, stream_id_);
  }

 #ifdef CAFFE2_USE_CUDNN
  cudnnHandle_t cudnn_handle() {
-    return cuda_objects_.GetCudnnHandle(gpu_id_, stream_id_);
+    return getCudaObjects().GetCudnnHandle(gpu_id_, stream_id_);
  }
 #endif // CAFFE2_USE_CUDNN

@ -242,7 +242,7 @@ class CAFFE2_API CUDAContext final : public BaseContext {
        src,
        nbytes,
        cudaMemcpyDefault,
-        cuda_objects_.GetStream(gpu_id_, stream_id_)));
+        getCudaObjects().GetStream(gpu_id_, stream_id_)));
  }

  void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@ -302,7 +302,7 @@ class CAFFE2_API CUDAContext final : public BaseContext {
  int stream_id_ = 0;
  int random_seed_;
  curandGenerator_t curand_generator_{nullptr};
-  static thread_local ThreadLocalCUDAObjects cuda_objects_;
+  static ThreadLocalCUDAObjects& getCudaObjects();
 };

 // For the CPU context, we also allow a (probably expensive) function
--- a/caffe2/core/event.h
+++ b/caffe2/core/event.h
@ -51,7 +51,7 @@ typedef void (*EventResetFunction)(Event*);
 typedef std::function<void()> EventCallbackFunction;
 typedef void (*EventSetCallbackFunction)(Event*, EventCallbackFunction);

-class Event {
+class CAFFE2_API Event {
 public:
  explicit Event(const DeviceOption& option)
      : event_(), type_(option.device_type()), option_(option) {
@ -170,21 +170,20 @@ class Event {
  int type_;
  DeviceOption option_;

-  CAFFE2_API static EventCreateFunction event_creator_[MaxDeviceTypes];
-  CAFFE2_API static EventRecordFunction event_recorder_[MaxDeviceTypes];
-  CAFFE2_API static EventWaitFunction event_waiter_[MaxDeviceTypes]
-                                                   [MaxDeviceTypes];
-  CAFFE2_API static EventFinishFunction event_finisher_[MaxDeviceTypes];
+  static EventCreateFunction event_creator_[MaxDeviceTypes];
+  static EventRecordFunction event_recorder_[MaxDeviceTypes];
+  static EventWaitFunction event_waiter_[MaxDeviceTypes]
+                                        [MaxDeviceTypes];
+  static EventFinishFunction event_finisher_[MaxDeviceTypes];

-  CAFFE2_API static EventQueryFunction event_querier_[MaxDeviceTypes];
-  CAFFE2_API static EventErrorMessageFunction
+  static EventQueryFunction event_querier_[MaxDeviceTypes];
+  static EventErrorMessageFunction
      event_err_msg_getter_[MaxDeviceTypes];
-  CAFFE2_API static EventSetFinishedFunction
+  static EventSetFinishedFunction
      event_finished_setter_[MaxDeviceTypes];
-  CAFFE2_API static EventResetFunction event_resetter_[MaxDeviceTypes];
+  static EventResetFunction event_resetter_[MaxDeviceTypes];

-  CAFFE2_API static EventSetCallbackFunction
-      event_callback_setter_[MaxDeviceTypes];
+  static EventSetCallbackFunction event_callback_setter_[MaxDeviceTypes];

  template <int d>
  friend struct EventCreateFunctionRegisterer;
--- a/caffe2/core/flags.cc
+++ b/caffe2/core/flags.cc
@ -9,7 +9,7 @@ namespace caffe2 {

 #ifdef CAFFE2_USE_GFLAGS

-void SetUsageMessage(const string& str) {
+CAFFE2_EXPORT void SetUsageMessage(const string& str) {
  if (UsageMessage() != nullptr) {
    // Usage message has already been set, so we will simply return.
    return;
@ -17,16 +17,16 @@ void SetUsageMessage(const string& str) {
  gflags::SetUsageMessage(str);
 }

-const char* UsageMessage() {
+CAFFE2_EXPORT const char* UsageMessage() {
  return gflags::ProgramUsage();
 }

-bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
+CAFFE2_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
  if (*pargc == 0) return true;
  return gflags::ParseCommandLineFlags(pargc, pargv, true);
 }

-bool CommandLineFlagsHasBeenParsed() {
+CAFFE2_EXPORT bool CommandLineFlagsHasBeenParsed() {
  // There is no way we query gflags right now, so we will simply return true.
  return true;
 }
@ -49,10 +49,10 @@ static string gUsageMessage = "(Usage message not set.)";
 }


-void SetUsageMessage(const string& str) { gUsageMessage = str; }
-const char* UsageMessage() { return gUsageMessage.c_str(); }
+CAFFE2_EXPORT void SetUsageMessage(const string& str) { gUsageMessage = str; }
+CAFFE2_EXPORT const char* UsageMessage() { return gUsageMessage.c_str(); }

-bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
+CAFFE2_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
  if (*pargc == 0) return true;
  char** argv = *pargv;
  bool success = true;
@ -136,18 +136,18 @@ bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
  return success;
 }

-bool CommandLineFlagsHasBeenParsed() {
+CAFFE2_EXPORT bool CommandLineFlagsHasBeenParsed() {
  return gCommandLineFlagsParsed;
 }

 template <>
-bool Caffe2FlagParser::Parse<string>(const string& content, string* value) {
+CAFFE2_EXPORT bool Caffe2FlagParser::Parse<string>(const string& content, string* value) {
  *value = content;
  return true;
 }

 template <>
-bool Caffe2FlagParser::Parse<int>(const string& content, int* value) {
+CAFFE2_EXPORT bool Caffe2FlagParser::Parse<int>(const string& content, int* value) {
  try {
    *value = std::atoi(content.c_str());
    return true;
@ -159,7 +159,7 @@ bool Caffe2FlagParser::Parse<int>(const string& content, int* value) {
 }

 template <>
-bool Caffe2FlagParser::Parse<int64_t>(const string& content, int64_t* value) {
+CAFFE2_EXPORT bool Caffe2FlagParser::Parse<int64_t>(const string& content, int64_t* value) {
  try {
    static_assert(sizeof(long long) == sizeof(int64_t), "");
 #ifdef __ANDROID__
@ -177,7 +177,7 @@ bool Caffe2FlagParser::Parse<int64_t>(const string& content, int64_t* value) {
 }

 template <>
-bool Caffe2FlagParser::Parse<double>(const string& content, double* value) {
+CAFFE2_EXPORT bool Caffe2FlagParser::Parse<double>(const string& content, double* value) {
  try {
    *value = std::atof(content.c_str());
    return true;
@ -190,7 +190,7 @@ bool Caffe2FlagParser::Parse<double>(const string& content, double* value) {
 }

 template <>
-bool Caffe2FlagParser::Parse<bool>(const string& content, bool* value) {
+CAFFE2_EXPORT bool Caffe2FlagParser::Parse<bool>(const string& content, bool* value) {
  if (content == "false" || content == "False" || content == "FALSE" ||
      content == "0") {
    *value = false;
--- a/caffe2/core/flags.h
+++ b/caffe2/core/flags.h
@ -92,7 +92,7 @@ namespace gflags = google;
 #define CAFFE2_DEFINE_int(name, default_value, help_str)                       \
  CAFFE2_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
 #define CAFFE2_DEFINE_int64(name, default_value, help_str)                     \
-  CAFFE2_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)              
+  CAFFE2_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
 #define CAFFE2_DEFINE_double(name, default_value, help_str)                    \
  CAFFE2_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str)
 #define CAFFE2_DEFINE_bool(name, default_value, help_str)                      \
--- a/caffe2/core/net_async_base.cc
+++ b/caffe2/core/net_async_base.cc
@ -55,7 +55,10 @@ CAFFE2_DEFINE_bool(

 namespace caffe2 {

-thread_local std::vector<int> AsyncNetBase::stream_counters_;
+std::vector<int>& AsyncNetBase::getStreamCounters() {
+  static thread_local std::vector<int> stream_counters_;
+  return stream_counters_;
+}

 AsyncNetBase::AsyncNetBase(
    const std::shared_ptr<const NetDef>& net_def,
@ -172,12 +175,12 @@ int AsyncNetBase::stream(int task_id) {
  if (device_option.device_type() == CUDA) {
    int gpu_id = device_option.cuda_gpu_id();
    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
-    if ((unsigned)gpu_id >= stream_counters_.size()) {
-      stream_counters_.resize(gpu_id + 1, 0);
+    if ((unsigned)gpu_id >= getStreamCounters().size()) {
+      getStreamCounters().resize(gpu_id + 1, 0);
    }
    do {
-      stream_id = stream_counters_[gpu_id]++;
-      stream_counters_[gpu_id] %= streams_per_gpu_;
+      stream_id = getStreamCounters().at(gpu_id)++;
+      getStreamCounters().at(gpu_id) %= streams_per_gpu_;
    } while (check_stream_status_ && !isStreamFree(task_id, stream_id));
  }
  return stream_id;
--- a/caffe2/core/net_async_base.h
+++ b/caffe2/core/net_async_base.h
@ -100,7 +100,7 @@ class CAFFE2_API AsyncNetBase : public NetBase {
      PoolsMap;
  PoolsMap cpu_pools_;
  PoolsMap gpu_pools_;
-  static thread_local std::vector<int> stream_counters_;
+  static std::vector<int>& getStreamCounters();
  int num_workers_;

  // Exception/error handling
--- a/caffe2/core/nomnigraph/CMakeLists.txt
+++ b/caffe2/core/nomnigraph/CMakeLists.txt
@ -3,7 +3,10 @@ file(GLOB_RECURSE NOMNI_SRCS *.cc)
 file(GLOB_RECURSE NOMNI_TEST_SRCS *test.cc)
 exclude(NOMNI_SRCS "${NOMNI_SRCS}" "${NOMNI_TEST_SRCS}")

+# TODO(orionr): The nomnigraph source should likely just be included
+# in the Caffe2 source list, since this won't live separately
 add_library(nomnigraph STATIC "${NOMNI_SRCS}")
+target_compile_options(nomnigraph PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
 add_dependencies(nomnigraph Caffe2_PROTO)

 target_include_directories(nomnigraph PUBLIC
--- a/caffe2/core/nomnigraph/include/nomnigraph/Graph/TarjansImpl.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Graph/TarjansImpl.h
@ -13,6 +13,7 @@ struct GraphWrapper {
  struct NodeWrapper {
    using NodeRef = typename Graph<T, U...>::NodeRef;
    NodeWrapper(NodeRef n) : node(n) {}
+    NodeWrapper() = default;
    NodeRef node;
    int Index = -1;
    int LowLink = -1;
--- a/caffe2/core/nomnigraph/include/nomnigraph/Representations/Compiler.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Representations/Compiler.h
@ -25,7 +25,7 @@ class CAFFE2_API Value {
 class CAFFE2_API Data : public Value {
 public:
  Data() : Value(ValueKind::Data) {}
-  CAFFE2_API static bool classof(const Value* V) {
+  static bool classof(const Value* V) {
    return V->getKind() == ValueKind::Data;
  }
  virtual ~Data() = default;
@ -54,7 +54,7 @@ class CAFFE2_API Instruction : public Value {
  };
  Instruction() : Value(ValueKind::Instruction), op_(Opcode::Generic) {}
  Instruction(Opcode op) : Value(ValueKind::Instruction), op_(op) {}
-  CAFFE2_API static bool classof(const Value* V) {
+  static bool classof(const Value* V) {
    return V->getKind() == ValueKind::Instruction;
  }
  virtual ~Instruction() = default;
--- a/caffe2/core/nomnigraph/include/nomnigraph/Representations/ControlFlow.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Representations/ControlFlow.h
@ -131,7 +131,7 @@ using BasicBlockType = typename ControlFlowGraphImpl<G>::bbType;
 /// \brief Converts graph to SSA representation.  Modifies the graph
 /// by inserting versions and phi nodes.
 template <typename Phi, typename G>
-CAFFE2_API void addSSA(G* dfg, ControlFlowGraph<G>* cfg) {
+CAFFE2_EXPORT void addSSA(G* dfg, ControlFlowGraph<G>* cfg) {
  static_assert(
      std::is_base_of<Instruction, Phi>::value,
      "Phi type must be derived from Instruction.");
--- a/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h
@ -168,7 +168,7 @@ class CAFFE2_API Tensor : public NeuralNetData {
      : NeuralNetData(NNDataKind::Tensor),
        name_(name),
        type_(DataType::Generic) {}
-  CAFFE2_API static bool classof(const NeuralNetData* D) {
+  static bool classof(const NeuralNetData* D) {
    return D->getKind() == NNDataKind::Tensor;
  }

@ -195,10 +195,10 @@ class CAFFE2_API Tensor : public NeuralNetData {
 };

 #define NOMNIGRAPH_DEFINE_NN_RTTI(op)                                 \
-  CAFFE2_API static bool classof(const NeuralNetOperator* N) {        \
+  static bool classof(const NeuralNetOperator* N) {        \
    return N->getKind() == NNKind::op;                                \
  }                                                                   \
-  CAFFE2_API static bool classof(const Value* N) {                    \
+  static bool classof(const Value* N) {                    \
    if (isa<NeuralNetOperator>(N)) {                                  \
      return dyn_cast<NeuralNetOperator>(N)->getKind() == NNKind::op; \
    }                                                                 \
@ -342,7 +342,7 @@ inline T* get(N n) {
 }

 template <typename T, typename G>
-CAFFE2_API std::vector<typename G::NodeRef> nodeIterator(G& g) {
+std::vector<typename G::NodeRef> nodeIterator(G& g) {
  std::vector<typename G::NodeRef> out;
  for (auto node : g.getMutableNodes()) {
    if (!is<T>(node)) {
@ -354,7 +354,7 @@ CAFFE2_API std::vector<typename G::NodeRef> nodeIterator(G& g) {
 }

 template <typename T, typename G>
-CAFFE2_API std::vector<std::pair<T*, typename G::NodeRef>> dataIterator(G& g) {
+std::vector<std::pair<T*, typename G::NodeRef>> dataIterator(G& g) {
  std::vector<std::pair<T*, typename G::NodeRef>> out;
  for (auto node : g.getMutableNodes()) {
    if (!is<T>(node)) {
@ -367,7 +367,7 @@ CAFFE2_API std::vector<std::pair<T*, typename G::NodeRef>> dataIterator(G& g) {
 }

 template <typename T, typename... Args>
-CAFFE2_API void insertOp(
+void insertOp(
    NNGraph& g,
    NNGraph::NodeRef a,
    NNGraph::NodeRef b,
@ -397,7 +397,7 @@ CAFFE2_API void insertOp(
 }

 template <typename NewT, typename OldT>
-CAFFE2_API NNGraph::NodeRef convertNode(NNGraph& g, NNGraph::NodeRef node) {
+NNGraph::NodeRef convertNode(NNGraph& g, NNGraph::NodeRef node) {
  assert(is<OldT>(node) && "Cannot get type from node.");

  NeuralNetOperator* nnOpPtr =
@ -428,14 +428,19 @@ template <NNGraph* G>
 struct CAFFE2_API NodeHelper {};

 struct CAFFE2_API NNNodeMatchCriteria {
-  const std::function<bool(NNGraph::NodeRef)> predicate;
-  const std::string debugString;
+  std::function<bool(NNGraph::NodeRef)> predicate;
+  std::string debugString;

  NNNodeMatchCriteria(
      const std::function<bool(NNGraph::NodeRef)>& predicate,
      const std::string& debugString = "No debug string specified")
      : predicate(predicate), debugString(debugString){};

+  NNNodeMatchCriteria() = default;
+  NNNodeMatchCriteria(const NNNodeMatchCriteria&) = default;
+  NNNodeMatchCriteria& operator=(const NNNodeMatchCriteria&) = default;
+  NNNodeMatchCriteria(NNNodeMatchCriteria&&) = default;
+
  NNNodeMatchCriteria andCriteria(const NNNodeMatchCriteria& other) {
    auto thisPredicate = predicate;
    auto otherPredicate = other.predicate;
@ -463,7 +468,7 @@ CAFFE2_API NNNodeMatchCriteria criteriaSingleOutputAndConsumer();
 CAFFE2_API NNNodeMatchCriteria criteriaSingleConsumer();

 template <typename NodeType>
-CAFFE2_API NNNodeMatchCriteria matchOp(const std::string& debugString = "matchOp") {
+NNNodeMatchCriteria matchOp(const std::string& debugString = "matchOp") {
  return NNNodeMatchCriteria(
      [](NNGraph::NodeRef nodeRef) { return is<NodeType>(nodeRef); },
      debugString);
@ -472,7 +477,7 @@ CAFFE2_API NNNodeMatchCriteria matchOp(const std::string& debugString = "matchOp
 CAFFE2_API NNNodeMatchCriteria matchTensor();

 template <typename NodeType>
-CAFFE2_API NNNodeMatchCriteria matchOp(
+NNNodeMatchCriteria matchOp(
    const std::function<bool(const NodeType&)> predicate,
    const std::string& debugString = "matchOpWithPredicate") {
  return NNNodeMatchCriteria(
@ -485,7 +490,7 @@ CAFFE2_API NNNodeMatchCriteria matchOp(
 };

 struct CAFFE2_API NNNodeMatch {
-  CAFFE2_API static bool isMatch(
+  static bool isMatch(
      const NNGraph::NodeRef& node,
      const NNNodeMatchCriteria& criteria) {
    return criteria.predicate(node);
--- a/caffe2/core/nomnigraph/include/nomnigraph/Transformations/SubgraphMatcher.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Transformations/SubgraphMatcher.h
@ -1,6 +1,7 @@
 #ifndef NOM_TRANFORMATIONS_SUBGRAPH_MATCHER_H
 #define NOM_TRANFORMATIONS_SUBGRAPH_MATCHER_H

+#include "caffe2/core/common.h"
 #include "nomnigraph/Graph/Graph.h"

 #include <functional>
@ -28,7 +29,7 @@ namespace matcher {
 */

 template <typename NodeMatchCriteria>
-class MatchNode {
+class CAFFE2_API MatchNode {
 public:
  static const int kStarCount = -1;
  MatchNode(
@ -41,6 +42,11 @@ class MatchNode {
        count_(count),
        nonTerminal_(nonTerminal) {}

+  MatchNode() = default;
+  MatchNode(const MatchNode&) = default;
+  MatchNode& operator=(const MatchNode&) = default;
+  MatchNode(MatchNode&&) = default;
+
  NodeMatchCriteria getCriteria() const {
    return criteria_;
  }
@ -58,10 +64,10 @@ class MatchNode {
  }

 private:
-  const NodeMatchCriteria criteria_;
-  const bool includeInSubgraph_;
-  const int count_;
-  const bool nonTerminal_;
+  NodeMatchCriteria criteria_;
+  bool includeInSubgraph_;
+  int count_;
+  bool nonTerminal_;
 };

 template <typename NodeMatchCriteria>
--- a/caffe2/core/operator_schema.h
+++ b/caffe2/core/operator_schema.h
@ -328,7 +328,7 @@ class CAFFE2_API OpSchema {
    return inplace_enforced_(x, y);
  }

-  friend std::ostream& operator<<(std::ostream& out, const OpSchema& schema);
+  CAFFE2_API friend std::ostream& operator<<(std::ostream& out, const OpSchema& schema);

  const std::vector<Argument>& args() const {
    return args_;
--- a/caffe2/core/registry.h
+++ b/caffe2/core/registry.h
@ -152,7 +152,8 @@ class CAFFE2_API Registerer {
 */
 #define CAFFE_DECLARE_TYPED_REGISTRY(                                    \
    RegistryName, SrcType, ObjectType, PtrType, ...)                     \
-  Registry<SrcType, PtrType<ObjectType>, ##__VA_ARGS__>* RegistryName(); \
+  CAFFE2_EXPORT Registry<SrcType, PtrType<ObjectType>, ##__VA_ARGS__>*   \
+  RegistryName();                                                        \
  typedef Registerer<SrcType, PtrType<ObjectType>, ##__VA_ARGS__>        \
      Registerer##RegistryName;

--- a/caffe2/core/tensor.h
+++ b/caffe2/core/tensor.h
@ -948,7 +948,15 @@ class CAFFE2_API UndefinedTensorImpl final : public TensorImpl {
  UndefinedTensorImpl() : TensorImpl(CPU){};

 public:
-  static constexpr TensorImpl* singleton() {
+ // Without this, we get:
+ //  error: identifier "at::UndefinedTensor::_singleton" is undefined in device code
+ // (ostensibly because the constexpr tricks MSVC into trying to compile this
+ // function for device as well).
+#ifdef _WIN32
+ static inline TensorImpl * singleton() {
+#else
+ static constexpr inline TensorImpl * singleton() {
+#endif
    return &singleton_;
  }

--- a/caffe2/operators/is_empty_op.cc
+++ b/caffe2/operators/is_empty_op.cc
@ -31,14 +31,14 @@ op = core.CreateOperator(
    ["is_empty"],
 )

-# Use a not-empty tensor
+// Use a not-empty tensor
 workspace.FeedBlob("tensor", np.random.randn(2, 2).astype(np.float32))
 print("tensor:\n", workspace.FetchBlob("tensor"))

 workspace.RunOperatorOnce(op)
 print("is_empty: ", workspace.FetchBlob("is_empty"),"\n")

-# Use an empty tensor
+// Use an empty tensor
 workspace.FeedBlob("tensor", np.empty(0))
 print("tensor:\n", workspace.FetchBlob("tensor"))

--- a/caffe2/operators/text_file_reader_utils.h
+++ b/caffe2/operators/text_file_reader_utils.h
@ -17,7 +17,7 @@ struct CAFFE2_API Token {

 class CAFFE2_API TokenizedString {
  // holder for strings that have been modified
-  std::vector<std::unique_ptr<std::string>> modifiedStrings_;
+  std::vector<std::shared_ptr<std::string>> modifiedStrings_;
  std::vector<Token> tokens_;
  int lastDelim_;

--- a/caffe2/opt/converter.h
+++ b/caffe2/opt/converter.h
@ -46,7 +46,7 @@ public:
    return &OpDef;
  }

-  CAFFE2_API static bool classof(const Annotation *A) {
+  static bool classof(const Annotation *A) {
    return A->getKind() == AnnotationKind::Caffe2;
  }

--- a/caffe2/utils/CMakeLists.txt
+++ b/caffe2/utils/CMakeLists.txt
@ -1,14 +1,13 @@
-list(APPEND Caffe2_CPU_SRCS
-  utils/proto_wrap.cc)
-
-# ---[ only support the above when full caffe2 isn't built
-if (NOT BUILD_CAFFE2)
-  set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
-  set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
-  return()
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (MSVC AND BUILD_SHARED_LIBS)
+list(APPEND Caffe2_CPU_SRCS	utils/proto_wrap.cc)
+set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
+return()
 endif()

 list(APPEND Caffe2_CPU_SRCS
+  utils/proto_wrap.cc
  utils/proto_utils.cc
  utils/murmur_hash3.cc
  utils/smart_tensor_printer.cc
--- a/caffe2/utils/proto_utils.cc
+++ b/caffe2/utils/proto_utils.cc
@ -21,11 +21,11 @@ using ::google::protobuf::MessageLite;

 namespace caffe2 {

-std::string DeviceTypeName(const int32_t& d) {
+CAFFE2_EXPORT std::string DeviceTypeName(const int32_t& d) {
  return at::DeviceTypeName(static_cast<at::DeviceType>(d));
 }

-int DeviceId(const DeviceOption& option) {
+CAFFE2_EXPORT int DeviceId(const DeviceOption& option) {
  switch (option.device_type()) {
    case CPU:
      return option.numa_node_id();
@ -40,7 +40,7 @@ int DeviceId(const DeviceOption& option) {
  }
 }

-bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) {
+CAFFE2_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) {
  return (
      lhs.device_type() == rhs.device_type() &&
      lhs.cuda_gpu_id() == rhs.cuda_gpu_id() &&
@ -49,7 +49,7 @@ bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) {
      lhs.numa_node_id() == rhs.numa_node_id());
 }

-bool ReadStringFromFile(const char* filename, string* str) {
+CAFFE2_EXPORT bool ReadStringFromFile(const char* filename, string* str) {
  std::ifstream ifs(filename, std::ios::in);
  if (!ifs) {
    VLOG(1) << "File cannot be opened: " << filename
@ -64,7 +64,7 @@ bool ReadStringFromFile(const char* filename, string* str) {
  return true;
 }

-bool WriteStringToFile(const string& str, const char* filename) {
+CAFFE2_EXPORT bool WriteStringToFile(const string& str, const char* filename) {
  std::ofstream ofs(filename, std::ios::out | std::ios::trunc);
  if (!ofs.is_open()) {
    VLOG(1) << "File cannot be created: " << filename
@ -102,11 +102,11 @@ class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream {
 };
 }  // namespace

-string ProtoDebugString(const MessageLite& proto) {
+CAFFE2_EXPORT string ProtoDebugString(const MessageLite& proto) {
  return proto.SerializeAsString();
 }

-bool ParseProtoFromLargeString(const string& str, MessageLite* proto) {
+CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, MessageLite* proto) {
  ::google::protobuf::io::ArrayInputStream input_stream(str.data(), str.size());
  ::google::protobuf::io::CodedInputStream coded_stream(&input_stream);
  // Set PlanDef message size limit to 2G.
@ -114,7 +114,7 @@ bool ParseProtoFromLargeString(const string& str, MessageLite* proto) {
  return proto->ParseFromCodedStream(&coded_stream);
 }

-bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
+CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
  ::google::protobuf::io::CopyingInputStreamAdaptor stream(
      new IfstreamInputStream(filename));
  stream.SetOwnsCopyingStream(true);
@ -125,7 +125,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
  return proto->ParseFromCodedStream(&coded_stream);
 }

-void WriteProtoToBinaryFile(
+CAFFE2_EXPORT void WriteProtoToBinaryFile(
    const MessageLite& /*proto*/,
    const char* /*filename*/) {
  LOG(FATAL) << "Not implemented yet.";
@ -144,16 +144,16 @@ using ::google::protobuf::io::CodedOutputStream;
 using ::google::protobuf::Message;

 namespace TextFormat {
-bool ParseFromString(const string& spec, Message* proto) {
+CAFFE2_EXPORT bool ParseFromString(const string& spec, Message* proto) {
  return ::google::protobuf::TextFormat::ParseFromString(spec, proto);
 }
 } // namespace TextFormat

-string ProtoDebugString(const Message& proto) {
+CAFFE2_EXPORT string ProtoDebugString(const Message& proto) {
  return proto.ShortDebugString();
 }

-bool ParseProtoFromLargeString(const string& str, Message* proto) {
+CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, Message* proto) {
  ::google::protobuf::io::ArrayInputStream input_stream(str.data(), str.size());
  ::google::protobuf::io::CodedInputStream coded_stream(&input_stream);
  // Set PlanDef message size limit to 2G.
@ -161,7 +161,7 @@ bool ParseProtoFromLargeString(const string& str, Message* proto) {
  return proto->ParseFromCodedStream(&coded_stream);
 }

-bool ReadProtoFromTextFile(const char* filename, Message* proto) {
+CAFFE2_EXPORT bool ReadProtoFromTextFile(const char* filename, Message* proto) {
  int fd = open(filename, O_RDONLY);
  CAFFE_ENFORCE_NE(fd, -1, "File not found: ", filename);
  FileInputStream* input = new FileInputStream(fd);
@ -171,7 +171,7 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) {
  return success;
 }

-void WriteProtoToTextFile(const Message& proto, const char* filename) {
+CAFFE2_EXPORT void WriteProtoToTextFile(const Message& proto, const char* filename) {
  int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
  FileOutputStream* output = new FileOutputStream(fd);
  CAFFE_ENFORCE(google::protobuf::TextFormat::Print(proto, output));
@ -179,7 +179,7 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
  close(fd);
 }

-bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
+CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
 #if defined (_MSC_VER)  // for MSC compiler binary flag needs to be specified
  int fd = open(filename, O_RDONLY | O_BINARY);
 #else
@ -198,7 +198,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
  return success;
 }

-void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
+CAFFE2_EXPORT void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
  int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
  CAFFE_ENFORCE_NE(
      fd, -1, "File cannot be created: ", filename, " error number: ", errno);
@ -214,7 +214,7 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
 #endif  // CAFFE2_USE_LITE_PROTO


-ArgumentHelper::ArgumentHelper(const OperatorDef& def) {
+CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const OperatorDef& def) {
  for (auto& arg : def.arg()) {
    if (arg_map_.count(arg.name())) {
      if (arg.SerializeAsString() != arg_map_[arg.name()].SerializeAsString()) {
@ -235,7 +235,7 @@ ArgumentHelper::ArgumentHelper(const OperatorDef& def) {
  }
 }

-ArgumentHelper::ArgumentHelper(const NetDef& netdef) {
+CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const NetDef& netdef) {
  for (auto& arg : netdef.arg()) {
    CAFFE_ENFORCE(
        arg_map_.count(arg.name()) == 0,
@ -245,7 +245,7 @@ ArgumentHelper::ArgumentHelper(const NetDef& netdef) {
  }
 }

-bool ArgumentHelper::HasArgument(const string& name) const {
+CAFFE2_EXPORT bool ArgumentHelper::HasArgument(const string& name) const {
  return arg_map_.count(name);
 }

@ -270,7 +270,7 @@ std::ostream& operator<<(std::ostream& output, const NetDef& n) {
 #define INSTANTIATE_GET_SINGLE_ARGUMENT(                                      \
    T, fieldname, enforce_lossless_conversion)                                \
  template <>                                                                 \
-  T ArgumentHelper::GetSingleArgument<T>(                                     \
+  CAFFE2_EXPORT T ArgumentHelper::GetSingleArgument<T>(                       \
      const string& name, const T& default_value) const {                     \
    if (arg_map_.count(name) == 0) {                                          \
      VLOG(1) << "Using default parameter value " << default_value            \
@ -297,7 +297,7 @@ std::ostream& operator<<(std::ostream& output, const NetDef& n) {
    return static_cast<T>(value);                                             \
  }                                                                           \
  template <>                                                                 \
-  bool ArgumentHelper::HasSingleArgumentOfType<T>(const string& name) const { \
+  CAFFE2_EXPORT bool ArgumentHelper::HasSingleArgumentOfType<T>(const string& name) const { \
    if (arg_map_.count(name) == 0) {                                          \
      return false;                                                           \
    }                                                                         \
@ -321,7 +321,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(NetDef, n, false)
 #define INSTANTIATE_GET_REPEATED_ARGUMENT(                             \
    T, fieldname, enforce_lossless_conversion)                         \
  template <>                                                          \
-  vector<T> ArgumentHelper::GetRepeatedArgument<T>(                    \
+  CAFFE2_EXPORT vector<T> ArgumentHelper::GetRepeatedArgument<T>(      \
      const string& name, const std::vector<T>& default_value) const { \
    if (arg_map_.count(name) == 0) {                                   \
      return default_value;                                            \
@ -360,7 +360,7 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(NetDef, nets, false)

 #define CAFFE2_MAKE_SINGULAR_ARGUMENT(T, fieldname)                            \
 template <>                                                                    \
-Argument MakeArgument(const string& name, const T& value) {                    \
+CAFFE2_EXPORT Argument MakeArgument(const string& name, const T& value) {      \
  Argument arg;                                                                \
  arg.set_name(name);                                                          \
  arg.set_##fieldname(value);                                                  \
@ -375,12 +375,12 @@ CAFFE2_MAKE_SINGULAR_ARGUMENT(string, s)
 #undef CAFFE2_MAKE_SINGULAR_ARGUMENT

 template <>
-bool ArgumentHelper::RemoveArgument(OperatorDef& def, int index);
+CAFFE2_EXPORT bool ArgumentHelper::RemoveArgument(OperatorDef& def, int index);
 template <>
 bool ArgumentHelper::RemoveArgument(NetDef& def, int index);

 template <>
-Argument MakeArgument(const string& name, const MessageLite& value) {
+CAFFE2_EXPORT Argument MakeArgument(const string& name, const MessageLite& value) {
  Argument arg;
  arg.set_name(name);
  arg.set_s(value.SerializeAsString());
@ -389,7 +389,7 @@ Argument MakeArgument(const string& name, const MessageLite& value) {

 #define CAFFE2_MAKE_REPEATED_ARGUMENT(T, fieldname)                            \
 template <>                                                                    \
-Argument MakeArgument(const string& name, const vector<T>& value) {            \
+CAFFE2_EXPORT Argument MakeArgument(const string& name, const vector<T>& value) {\
  Argument arg;                                                                \
  arg.set_name(name);                                                          \
  for (const auto& v : value) {                                                \
@ -404,7 +404,7 @@ CAFFE2_MAKE_REPEATED_ARGUMENT(int64_t, ints)
 CAFFE2_MAKE_REPEATED_ARGUMENT(string, strings)
 #undef CAFFE2_MAKE_REPEATED_ARGUMENT

-bool HasOutput(const OperatorDef& op, const std::string& output) {
+CAFFE2_EXPORT bool HasOutput(const OperatorDef& op, const std::string& output) {
  for (const auto& outp : op.output()) {
    if (outp == output) {
      return true;
@ -413,7 +413,7 @@ bool HasOutput(const OperatorDef& op, const std::string& output) {
  return false;
 }

-bool HasInput(const OperatorDef& op, const std::string& input) {
+CAFFE2_EXPORT bool HasInput(const OperatorDef& op, const std::string& input) {
  for (const auto& inp : op.input()) {
    if (inp == input) {
      return true;
@ -423,7 +423,7 @@ bool HasInput(const OperatorDef& op, const std::string& input) {
 }

 // Return the argument index or -1 if it does not exist.
-int GetArgumentIndex(
+CAFFE2_EXPORT int GetArgumentIndex(
    const google::protobuf::RepeatedPtrField<Argument>& args,
    const string& name) {
  int index = 0;
@ -436,7 +436,7 @@ int GetArgumentIndex(
  return -1;
 }

-const Argument& GetArgument(const OperatorDef& def, const string& name) {
+CAFFE2_EXPORT const Argument& GetArgument(const OperatorDef& def, const string& name) {
  int index = GetArgumentIndex(def.arg(), name);
  if (index != -1) {
    return def.arg(index);
@ -449,7 +449,7 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
  }
 }

-const Argument& GetArgument(const NetDef& def, const string& name) {
+CAFFE2_EXPORT const Argument& GetArgument(const NetDef& def, const string& name) {
  int index = GetArgumentIndex(def.arg(), name);
  if (index != -1) {
    return def.arg(index);
@ -462,7 +462,7 @@ const Argument& GetArgument(const NetDef& def, const string& name) {
  }
 }

-bool GetFlagArgument(
+CAFFE2_EXPORT bool GetFlagArgument(
    const google::protobuf::RepeatedPtrField<Argument>& args,
    const string& name,
    bool default_value) {
@ -476,21 +476,21 @@ bool GetFlagArgument(
  return default_value;
 }

-bool GetFlagArgument(
+CAFFE2_EXPORT bool GetFlagArgument(
    const OperatorDef& def,
    const string& name,
    bool default_value) {
  return GetFlagArgument(def.arg(), name, default_value);
 }

-bool GetFlagArgument(
+CAFFE2_EXPORT bool GetFlagArgument(
    const NetDef& def,
    const string& name,
    bool default_value) {
  return GetFlagArgument(def.arg(), name, default_value);
 }

-Argument* GetMutableArgument(
+CAFFE2_EXPORT Argument* GetMutableArgument(
    const string& name,
    const bool create_if_missing,
    OperatorDef* def) {
--- a/caffe2/utils/proto_utils.h
+++ b/caffe2/utils/proto_utils.h
@ -194,7 +194,7 @@ CAFFE2_API bool HasInput(const OperatorDef& op, const std::string& input);
 * does not copy the operator def, so one would need to make sure that the
 * lifetime of the OperatorDef object outlives that of the ArgumentHelper.
 */
-class CAFFE2_API ArgumentHelper {
+class CAFFE2_EXPORT ArgumentHelper {
 public:
  template <typename Def>
  static bool HasArgument(const Def& def, const string& name) {
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -1,4 +1,4 @@
-# UBSAN triggers when compiling protobuf, so we need to disable it.
+ # UBSAN triggers when compiling protobuf, so we need to disable it.
 set(UBSAN_FLAG "-fsanitize=undefined")

 macro(disable_ubsan)
@ -24,14 +24,12 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
 endif()

 # ---[ Threads
-if(BUILD_CAFFE2)
-  include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake)
-  if (TARGET Threads::Threads)
-    list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads)
-  else()
-    message(FATAL_ERROR
-        "Cannot find threading library. Caffe2 requires Threads to compile.")
-  endif()
+include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake)
+if (TARGET Threads::Threads)
+  list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads)
+else()
+  message(FATAL_ERROR
+      "Cannot find threading library. Caffe2 requires Threads to compile.")
 endif()

 # ---[ protobuf
@ -42,22 +40,20 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
 endif()

 # ---[ git: used to generate git build string.
-if(BUILD_CAFFE2)
-  find_package(Git)
-  if(GIT_FOUND)
-    execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty
-                    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
-                    WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/.."
-                    OUTPUT_VARIABLE CAFFE2_GIT_VERSION
-                    RESULT_VARIABLE __git_result)
-    if(NOT ${__git_result} EQUAL 0)
-      set(CAFFE2_GIT_VERSION "unknown")
-    endif()
-  else()
-    message(
-        WARNING
-        "Cannot find git, so Caffe2 won't have any git build info available")
+find_package(Git)
+if(GIT_FOUND)
+  execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty
+                  ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
+                  WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/.."
+                  OUTPUT_VARIABLE CAFFE2_GIT_VERSION
+                  RESULT_VARIABLE __git_result)
+  if(NOT ${__git_result} EQUAL 0)
+    set(CAFFE2_GIT_VERSION "unknown")
  endif()
+else()
+  message(
+      WARNING
+      "Cannot find git, so Caffe2 won't have any git build info available")
 endif()

 # ---[ BLAS
@ -512,7 +508,7 @@ if(USE_CUDA)
 endif()

 # ---[ HIP
-if(BUILD_CAFFE2 OR NOT BUILD_ATEN_MOBILE)
+if(NOT BUILD_ATEN_MOBILE)
  include(${CMAKE_CURRENT_LIST_DIR}/public/LoadHIP.cmake)
  if(PYTORCH_FOUND_HIP)
    message(INFO "Compiling with HIP for AMD.")
@ -753,8 +749,11 @@ if (USE_NNAPI AND NOT ANDROID)
  caffe2_update_option(USE_NNAPI OFF)
 endif()

+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
 if (NOT BUILD_ATEN_MOBILE)
-  if (BUILD_CAFFE2)
+  if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
    list(APPEND Caffe2_DEPENDENCY_LIBS aten_op_header_gen)
    if (USE_CUDA)
      list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS aten_op_header_gen)
@ -762,6 +761,7 @@ if (NOT BUILD_ATEN_MOBILE)
    include_directories(${PROJECT_BINARY_DIR}/caffe2/contrib/aten)
  endif()
 endif()
+endif()

 if (USE_ZSTD)
  list(APPEND Caffe2_DEPENDENCY_LIBS libzstd_static)
@ -878,7 +878,6 @@ if (NOT BUILD_ATEN_MOBILE)
  ############################################
  # Flags
  # When using MSVC
-
  # Detect CUDA architecture and get best NVCC flags
  # finding cuda must be first because other things depend on the result
  #
@ -935,12 +934,12 @@ if (NOT BUILD_ATEN_MOBILE)
  OPTION(NDEBUG "disable asserts (WARNING: this may result in silent UB e.g. with out-of-bound indices)")
  IF (NOT NDEBUG)
    MESSAGE(STATUS "Removing -DNDEBUG from compile flags")
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS "" ${CMAKE_C_FLAGS})
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS_DEBUG "" ${CMAKE_C_FLAGS_DEBUG})
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "" ${CMAKE_C_FLAGS_RELEASE})
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS "" ${CMAKE_CXX_FLAGS})
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS_DEBUG "" ${CMAKE_CXX_FLAGS_DEBUG})
-    STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "" ${CMAKE_CXX_FLAGS_RELEASE})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS "" ${CMAKE_C_FLAGS})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_DEBUG "" ${CMAKE_C_FLAGS_DEBUG})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "" ${CMAKE_C_FLAGS_RELEASE})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS "" ${CMAKE_CXX_FLAGS})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_DEBUG "" ${CMAKE_CXX_FLAGS_DEBUG})
+    STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "" ${CMAKE_CXX_FLAGS_RELEASE})
  ENDIF()

  # OpenMP support?
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@ -18,7 +18,6 @@ function (caffe2_print_configuration_summary)
  message(STATUS "  CMAKE_INSTALL_PREFIX  : ${CMAKE_INSTALL_PREFIX}")
  message(STATUS "")

-  message(STATUS "  BUILD_CAFFE2          : ${BUILD_CAFFE2}")
  message(STATUS "  BUILD_ATEN_MOBILE     : ${BUILD_ATEN_MOBILE}")
  message(STATUS "  BUILD_BINARY          : ${BUILD_BINARY}")
  message(STATUS "  BUILD_CUSTOM_PROTOBUF : ${BUILD_CUSTOM_PROTOBUF}")
@ -29,9 +28,7 @@ function (caffe2_print_configuration_summary)
    message(STATUS "    Protobuf includes   : ${PROTOBUF_INCLUDE_DIRS}")
    message(STATUS "    Protobuf libraries  : ${PROTOBUF_LIBRARIES}")
  endif()
-  if (${BUILD_CAFFE2})
-    message(STATUS "  BUILD_DOCS            : ${BUILD_DOCS}")
-  endif()
+  message(STATUS "  BUILD_DOCS            : ${BUILD_DOCS}")
  message(STATUS "  BUILD_PYTHON          : ${BUILD_PYTHON}")
  if (${BUILD_PYTHON})
    message(STATUS "    Python version      : ${PYTHON_VERSION_STRING}")
--- a/conda/integrated/build.sh
+++ b/conda/integrated/build.sh
@ -47,35 +47,14 @@ else
  export NO_CUDA=1
 fi

-
 ###########################################################
-# Build Caffe2
-###########################################################
-cmake_args=()
-cmake_args+=("-DCMAKE_INSTALL_PREFIX=$PREFIX")
-
-# Build Caffe2
-mkdir -p caffe2_build && pushd caffe2_build
-cmake "${cmake_args[@]}" $CAFFE2_CMAKE_ARGS ..
-if [ "$(uname)" == 'Darwin' ]; then
-  make "-j$(sysctl -n hw.ncpu)"
-else
-  make "-j$(nproc)"
-fi
-make install/fast
-popd
-
-
-
-
-###########################################################
-# Build Pytorch
+# Build Caffe2 and PyTorch
 ###########################################################
 if [[ "$OSTYPE" == "darwin"* ]]; then
-  MACOSX_DEPLOYMENT_TARGET=10.9 python setup.py install
+  FULL_CAFFE2=1 MACOSX_DEPLOYMENT_TARGET=10.9 python setup.py install
  exit 0
 fi
-python setup.py install
+FULL_CAFFE2=1 python setup.py install



--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@ -1,5 +1,9 @@
 # ---[ Add modules
-add_subdirectory(detectron)
+# TODO(orionr): Enable Detectron ops for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+  add_subdirectory(detectron)
+endif()
 add_subdirectory(module_test)
 add_subdirectory(observers)
 add_subdirectory(rocksdb)
--- a/scripts/build_anaconda.sh
+++ b/scripts/build_anaconda.sh
@ -16,7 +16,7 @@
 #                      [<flags forwarded to cmake>]...
 #
 # Parameters can also be passed through the BUILD_ENVIRONMENT environment
-# variable, e.g. 
+# variable, e.g.
 #  BUILD_ENVIRONMENT=conda2-cuda8.0-cudnn7-gcc4.8 ./scripts/build_anaconda.sh
 # - Parameters parsed from the BUILD_ENVIRONMENT will be overridden by command
 #   line parameters.
@ -312,7 +312,7 @@ if [[ -n $integrated ]]; then
  if [[ -n $cuda_ver ]]; then
    # Removed until https://github.com/conda/conda/issues/7245 is resolved
    #append_to_section 'features' features:
-    #append_to_section 'features' "  - $cuda_feature_name" 
+    #append_to_section 'features' "  - $cuda_feature_name"
    append_to_section 'build' "- magma-$cuda_feature_name"
    #append_to_section 'features' '  - nccl2'
    #add_package $cuda_feature_name
--- a/tools/build_pytorch_libs.bat
+++ b/tools/build_pytorch_libs.bat
@ -169,7 +169,6 @@ goto:eof
  cd build
  cmake .. %CMAKE_GENERATOR_COMMAND% ^
                  -DCMAKE_BUILD_TYPE=%BUILD_TYPE% ^
-                  -DBUILD_CAFFE2=OFF ^
                  -DBUILD_TORCH="%BUILD_TORCH%" ^
                  -DNVTOOLEXT_HOME="%NVTOOLEXT_HOME%" ^
                  -DNO_API=ON ^
@ -178,7 +177,10 @@ goto:eof
                  -DBUILD_BINARY=OFF ^
                  -DONNX_NAMESPACE=%ONNX_NAMESPACE% ^
                  -DUSE_CUDA=%USE_CUDA% ^
+                  -DUSE_CUDNN=OFF ^
                  -DUSE_NNPACK=%USE_NNPACK% ^
+                  -DUSE_GLOG=OFF ^
+                  -DUSE_GFLAGS=OFF ^
                  -DCUDNN_INCLUDE_DIR="%CUDNN_INCLUDE_DIR%" ^
                  -DCUDNN_LIB_DIR="%CUDNN_LIB_DIR%" ^
                  -DCUDNN_LIBRARY="%CUDNN_LIBRARY%" ^
--- a/tools/build_pytorch_libs.sh
+++ b/tools/build_pytorch_libs.sh
@ -254,17 +254,19 @@ function build_caffe2() {
      -DPYTHON_EXECUTABLE=$PYTORCH_PYTHON \
      -DBUILDING_WITH_TORCH_LIBS=ON \
      -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-      -DBUILD_CAFFE2=$FULL_CAFFE2 \
      -DBUILD_TORCH=$BUILD_TORCH \
      -DBUILD_PYTHON=$FULL_CAFFE2 \
-      -DBUILD_BINARY=OFF \
      -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
+      -DBUILD_BINARY=$FULL_CAFFE2 \
+      -DBUILD_TEST=$FULL_CAFFE2 \
+      -DINSTALL_TEST=$FULL_CAFFE2 \
      -DONNX_NAMESPACE=$ONNX_NAMESPACE \
      -DUSE_CUDA=$USE_CUDA \
      -DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
      -DUSE_ROCM=$USE_ROCM \
      -DUSE_NNPACK=$USE_NNPACK \
-      -DCUDA_DEVICE_DEBUG=$CUDA_DEVICE_DEBUG \
+      -DUSE_GLOG=OFF \
+      -DUSE_GFLAGS=OFF \
      -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \
      -DCUDNN_LIB_DIR=$CUDNN_LIB_DIR \
      -DCUDNN_LIBRARY=$CUDNN_LIBRARY \