bugfix for Windows, esp. VS 2017

Summary: aaronmarkham this solves your Windows build issue. Basically: (1) VS 2017 does not have CUDA support yet, and we will be waiting on NVidia to do so. (2) VS 2015 and 2017 need different cmake generator strings. This PR shows how to determine those and also updates appveyor to do contbuild guard for the following 3 settings: - VS2015 without cuda - VS2017 without cuda - VS2015 with cuda Closes https://github.com/caffe2/caffe2/pull/210 Differential Revision: D4745007 Pulled By: Yangqing fbshipit-source-id: 50952552843abd0eb6f4145d9f132daeee3a6794
2025-10-20 21:14:14 +08:00 · 2017-03-21 05:04:50 -07:00
parent 93ff338ca7
commit aa4d07d3c4
16 changed files with 123 additions and 54 deletions
--- a/appveyor.yml
+++ b/appveyor.yml
@ -4,12 +4,27 @@ environment:
  matrix:
    - USE_CUDA: OFF
      CMAKE_BUILD_TYPE: Release
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+
+    # Building CUDA with Visual Studio 2017 is yet to be supported by
+    # NVidia, so we canot enable it right now.
+    #- USE_CUDA: ON
+    #  CMAKE_BUILD_TYPE: Release
+    #  APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017

    - USE_CUDA: ON
      CMAKE_BUILD_TYPE: Release
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015

    - USE_CUDA: OFF
-      CMAKE_BUILD_TYPE: Debug
+      CMAKE_BUILD_TYPE: Release
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+
+    # Debug build is not a top priority for us right now, so in the
+    # interest of contbuild time, we disable it.
+    #- USE_CUDA: OFF
+    #  CMAKE_BUILD_TYPE: Debug
+    #  APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017

    # Currently, CUDA + Debug does not work due to a error of using
    # std::_Debug_lt in device code. Not sure where this comes from yet,
--- a/caffe2/core/logging.h
+++ b/caffe2/core/logging.h
@ -158,7 +158,12 @@ struct EnforceOK {};

 class EnforceFailMessage {
 public:
+#ifdef _MSC_VER
+  // MSVC + NVCC ignores constexpr and will issue a warning if included.
+  /* implicit */ EnforceFailMessage(EnforceOK) : msg_(nullptr) {}
+#else
  constexpr /* implicit */ EnforceFailMessage(EnforceOK) : msg_(nullptr) {}
+#endif
  EnforceFailMessage(EnforceFailMessage&&) = default;
  EnforceFailMessage(const EnforceFailMessage&) = delete;
  EnforceFailMessage& operator=(EnforceFailMessage&&) = delete;
--- a/caffe2/operators/cross_entropy_op.h
+++ b/caffe2/operators/cross_entropy_op.h
@ -17,7 +17,7 @@ class LabelCrossEntropyOp final : public Operator<Context> {

 protected:
  static constexpr T kLOG_THRESHOLD() {
-    return 1e-20;
+    return static_cast<T>(1e-20);
  }
  // Input: X, label
  // Output: Y
@ -34,7 +34,7 @@ class LabelCrossEntropyGradientOp final : public Operator<Context> {
  // Input: X, label, dY
  // Ouptut: dX. There is no gradient with respect to the label.
  static constexpr T kLOG_THRESHOLD() {
-    return 1e-20;
+    return static_cast<T>(1e-20);
  }
 };

@ -91,7 +91,7 @@ class CrossEntropyOp final : public Operator<Context> {
  // Input: X, label
  // Output: Y
  static constexpr T kLOG_THRESHOLD() {
-    return 1e-20;
+    return static_cast<T>(1e-20);
  }
 };

@ -106,7 +106,7 @@ class CrossEntropyGradientOp final : public Operator<Context> {
  // Input: X, label, dY
  // Ouptut: dX. There is no gradient with respect to the label.
  static constexpr T kLOG_THRESHOLD() {
-    return 1e-20;
+    return static_cast<T>(1e-20);
  }
 };

--- a/caffe2/operators/distance_op.cc
+++ b/caffe2/operators/distance_op.cc
@ -75,7 +75,7 @@ bool CosineSimilarityOp<float, CPUContext>::RunOnDevice() {
  const float* X_data = X.data<float>();
  const float* Y_data = Y.data<float>();
  float X2, Y2;
-  const float kEps = 1e-12;
+  const float kEps = 1e-12f;
  for (int i = 0; i < N; ++i) { // TODO: multithreading
    auto offset = i * D;
    math::Dot<float, CPUContext>(
--- a/caffe2/operators/distance_op.h
+++ b/caffe2/operators/distance_op.h
@ -198,7 +198,7 @@ class CosineSimilarityGradientOp final : public Operator<Context> {
    auto* dX_data = dX->template mutable_data<T>();
    auto* dY_data = dY->template mutable_data<T>();
    T XN, YN, XY;
-    const T kEps = 1e-12;
+    const T kEps = 1e-12f;
    for (int i = 0; i < N; ++i) { // TODO: multithreading
      auto offset = i * D;

--- a/caffe2/operators/prefetch_op.h
+++ b/caffe2/operators/prefetch_op.h
@ -32,11 +32,11 @@ class PrefetchOperator : public OperatorBase {
        prefetch_success_(true),
        finalize_(false) {}

-  virtual ~PrefetchOperator() {
-    CAFFE_ENFORCE(
-        finalize_ || !prefetch_thread_.get(),
-        "Your derived class should call Finalize() in its destructor "
-        "so the prefetching thread is joined. ");
+  virtual ~PrefetchOperator() noexcept {
+    CHECK(finalize_ || !prefetch_thread_.get()) <<
+        "YOU MADE A PROGRAMING ERROR: derived class of PrefetchOperator "
+        "should call Finalize() in its destructor so the prefetching "
+        "thread is joined. ";
  }

  void Finalize() {
--- a/caffe2/python/pybind_state.h
+++ b/caffe2/python/pybind_state.h
@ -203,7 +203,7 @@ class TensorFeeder : public BlobFeederBase {
 };

 namespace python_detail {
-class Func;
+struct Func;
 }

 class PythonOpBase : public Operator<CPUContext> {
--- a/caffe2/sgd/adagrad_op.h
+++ b/caffe2/sgd/adagrad_op.h
@ -47,7 +47,7 @@ class AdagradOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  AdagradOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
-        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5)) {}
+        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
  bool RunOnDevice() override {
    CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size());
    CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size());
@ -78,7 +78,7 @@ class SparseAdagradOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  SparseAdagradOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
-        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5)) {}
+        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}

  bool RunOnDevice() override {
    return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(
--- a/caffe2/sgd/adam_op.h
+++ b/caffe2/sgd/adam_op.h
@ -58,9 +58,9 @@ class AdamOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  AdamOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
-        beta1_(OperatorBase::GetSingleArgument<float>("beta1", 0.9)),
-        beta2_(OperatorBase::GetSingleArgument<float>("beta2", 0.999)),
-        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5)) {}
+        beta1_(OperatorBase::GetSingleArgument<float>("beta1", 0.9f)),
+        beta2_(OperatorBase::GetSingleArgument<float>("beta2", 0.999f)),
+        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
  bool RunOnDevice() override {
    // Iter live on the CPU
    CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
@ -110,9 +110,9 @@ class SparseAdamOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  SparseAdamOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
-        beta1_(OperatorBase::GetSingleArgument<float>("beta1", 0.9)),
-        beta2_(OperatorBase::GetSingleArgument<float>("beta2", 0.999)),
-        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5)) {}
+        beta1_(OperatorBase::GetSingleArgument<float>("beta1", 0.9f)),
+        beta2_(OperatorBase::GetSingleArgument<float>("beta2", 0.999f)),
+        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}

  bool RunOnDevice() override {
    return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(
--- a/caffe2/sgd/ftrl_op.h
+++ b/caffe2/sgd/ftrl_op.h
@ -7,10 +7,10 @@ namespace caffe2 {
 template <typename T>
 struct FtrlParams {
  explicit FtrlParams(OperatorBase* op)
-      : alphaInv(1.0 / op->GetSingleArgument<float>("alpha", 0.005)),
-        beta(op->GetSingleArgument<float>("beta", 1.0)),
-        lambda1(op->GetSingleArgument<float>("lambda1", 0.001)),
-        lambda2(op->GetSingleArgument<float>("lambda2", 0.001)) {}
+      : alphaInv(1.0 / op->GetSingleArgument<float>("alpha", 0.005f)),
+        beta(op->GetSingleArgument<float>("beta", 1.0f)),
+        lambda1(op->GetSingleArgument<float>("lambda1", 0.001f)),
+        lambda2(op->GetSingleArgument<float>("lambda2", 0.001f)) {}
  T alphaInv;
  T beta;
  T lambda1;
--- a/caffe2/sgd/rmsprop_op.h
+++ b/caffe2/sgd/rmsprop_op.h
@ -26,9 +26,9 @@ class RmsPropOp final : public Operator<Context> {
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  RmsPropOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
-        decay_(OperatorBase::GetSingleArgument<float>("decay", 0.9)),
-        momentum_(OperatorBase::GetSingleArgument<float>("momentum", 0.0)),
-        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5)) {}
+        decay_(OperatorBase::GetSingleArgument<float>("decay", 0.9f)),
+        momentum_(OperatorBase::GetSingleArgument<float>("momentum", 0.0f)),
+        epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
  bool RunOnDevice() override {
    CAFFE_ENFORCE(Input(LR).size() == 1);
    CAFFE_ENFORCE(Input(GRAD).size() == Input(MEAN_SQUARES).size());
--- a/cmake/MiscCheck.cmake
+++ b/cmake/MiscCheck.cmake
@ -53,13 +53,19 @@ if(NOT CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING AND NOT MSVC)
 endif()

 # ---[ If we are using msvc, set no warning flags
+# Note(jiayq): if you are going to add an warning flag, check if this is
+# totally necessary, and only add when you see fit. If it is needed due to
+# a third party library (like Protobuf), mention it in the comment as
+# "THIRD_PARTY_NAME related"
 if (${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
  add_compile_options(
      /wd4018 # (3): Signed/unsigned mismatch
      /wd4065 # (3): switch with default but no case. Protobuf related.
      /wd4244 # (2/3/4): Possible loss of precision
      /wd4267 # (3): Conversion of size_t to smaller type. Possible loss of data.
+      /wd4503 # (1): decorated name length exceeded, name was truncated. Eigen related.
      /wd4506 # (1): no definition for inline function. Protobuf related.
+      /wd4554 # (3)： check operator precedence for possible error. Eigen related.
      /wd4800 # (3): Forcing non-boolean value to true or false.
      /wd4996 # (3): Use of a deprecated member
  )
--- a/scripts/appveyor/install.bat
+++ b/scripts/appveyor/install.bat
@ -1,28 +1,8 @@
 :: Installation scripts for appveyor.

-@echo Downloading CUDA toolkit 8 ...
+@echo on

-appveyor DownloadFile ^
-  https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^
-  -FileName cuda_8.0.44_windows.exe
-appveyor Downloadfile ^
-  http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^
-  -FileName cudnn-8.0-windows10-x64-v5.1.zip
-
-@echo Installing CUDA toolkit 8 ...
-cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0
-set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH%
-:: TODO: we will still need to figure out how to install cudnn.
-7z x cudnn-8.0-windows10-x64-v5.1.zip
-copy cuda\include\cudnn.h ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\"
-copy cuda\lib\x64\cudnn.lib ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\"
-copy cuda\bin\cudnn64_5.dll ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\"
-
-:: Make sure that nvcc is working correctly.
-nvcc -V || exit /b
+if "%USE_CUDA%" == "ON" call %~dp0%install_cuda.bat

 :: Miniconda path for appveyor
 set PATH=C:\Miniconda-x64;C:\Miniconda-x64\Scripts;%PATH%
--- a/scripts/appveyor/install_cuda.bat
+++ b/scripts/appveyor/install_cuda.bat
@ -0,0 +1,22 @@
+@echo on
+
+appveyor DownloadFile ^
+  https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^
+  -FileName cuda_8.0.44_windows.exe
+appveyor Downloadfile ^
+  http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^
+  -FileName cudnn-8.0-windows10-x64-v5.1.zip
+
+cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0
+set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH%
+
+7z x cudnn-8.0-windows10-x64-v5.1.zip
+copy cuda\include\cudnn.h ^
+  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\"
+copy cuda\lib\x64\cudnn.lib ^
+  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\"
+copy cuda\bin\cudnn64_5.dll ^
+  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\"
+
+:: Make sure that nvcc is working correctly.
+nvcc -V || exit /b
--- a/scripts/build_host_protoc.bat
+++ b/scripts/build_host_protoc.bat
@ -8,7 +8,7 @@
 :: After the execution of the file, one should be able to find the host protoc
 :: binary at build_host_protoc/bin/protoc.exe.

-@echo off
+@echo on

 SET ORIGINAL_DIR=%cd%
 SET CAFFE2_ROOT=%~dp0%..
@ -17,7 +17,31 @@ echo "Created %CAFFE2_ROOT%\build_host_protoc"

 cd %CAFFE2_ROOT%\build_host_protoc

-cmake ..\third_party\protobuf\cmake -DCMAKE_INSTALL_PREFIX=. -Dprotobuf_BUILD_TESTS=OFF
+if NOT DEFINED CMAKE_GENERATOR (
+  if DEFINED APPVEYOR_BUILD_WORKER_IMAGE (
+  	if "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2017" (
+      set CMAKE_GENERATOR="Visual Studio 15 2017 Win64"
+  	) else if "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2015" (
+      set CMAKE_GENERATOR="Visual Studio 14 2015 Win64"
+    ) else (
+      echo "You made a programming error: unknown APPVEYOR_BUILD_WORKER_IMAGE:"
+      echo %APPVEYOR_BUILD_WORKER_IMAGE%
+      exit /b
+    )
+  ) else (
+  	:: In default we use win64 VS 2017.
+  	set CMAKE_GENERATOR="Visual Studio 15 2017 Win64"
+  )
+)
+
+cmake ..\third_party\protobuf\cmake ^
+  -G%CMAKE_GENERATOR% ^
+  -DCMAKE_INSTALL_PREFIX=. ^
+  -Dprotobuf_BUILD_TESTS=OFF ^
+  -DCMAKE_BUILD_TYPE=Debug ^
+  || exit /b
+
+:: Actually run the build
 msbuild INSTALL.vcxproj

 cd %ORIGINAL_DIR%
--- a/scripts/build_windows.bat
+++ b/scripts/build_windows.bat
@ -14,17 +14,34 @@ if not exist %CAFFE2_ROOT%\build mkdir %CAFFE2_ROOT%\build
 cd %CAFFE2_ROOT%\build

 if NOT DEFINED USE_CUDA (
-  set USE_CUDA=ON
+  set USE_CUDA=OFF
 )

 if NOT DEFINED CMAKE_BUILD_TYPE (
  set CMAKE_BUILD_TYPE=Release
 )

+if NOT DEFINED CMAKE_GENERATOR (
+  if DEFINED APPVEYOR_BUILD_WORKER_IMAGE (
+    if "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2017" (
+      set CMAKE_GENERATOR="Visual Studio 14 2015 Win64"
+    ) else if "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2015" (
+      set CMAKE_GENERATOR="Visual Studio 14 2015 Win64"
+    ) else (
+      echo "You made a programming error: unknown APPVEYOR_BUILD_WORKER_IMAGE:"
+      echo %APPVEYOR_BUILD_WORKER_IMAGE%
+      exit /b
+    )
+  ) else (
+    :: In default we use win64 VS 2017.
+    set CMAKE_GENERATOR="Visual Studio 15 2017 Win64"
+  )
+)
+
 :: Set up cmake. We will skip building the test files right now.
 :: TODO: enable cuda support.
 cmake .. ^
-  -G"Visual Studio 14 2015 Win64" ^
+  -G%CMAKE_GENERATOR% ^
  -DCMAKE_VERBOSE_MAKEFILE=1 ^
  -DBUILD_TEST=OFF ^
  -DBUILD_SHARED_LIBS=OFF ^