NVIDIA
diff --git a/‎.travis.yml
+4-8 b/‎.travis.yml
+4-8
diff --git a/‎3rdparty/half_float/half.hpp
+1-1 b/‎3rdparty/half_float/half.hpp
+1-1
diff --git a/‎CMakeLists.txt
+8-9 b/‎CMakeLists.txt
+8-9
diff --git a/‎Makefile
+33-35 b/‎Makefile
+33-35
diff --git a/‎Makefile.config.example
-3 b/‎Makefile.config.example
-3
diff --git a/‎NVCaffe-User-Guide.pdf
1.59 MB b/‎NVCaffe-User-Guide.pdf
1.59 MB
diff --git a/‎README.md
+10-2 b/‎README.md
+10-2
diff --git a/‎cmake/Cuda.cmake
-4 b/‎cmake/Cuda.cmake
-4
diff --git a/‎cmake/Dependencies.cmake
+20-16 b/‎cmake/Dependencies.cmake
+20-16
diff --git a/‎cmake/Summary.cmake
+7-7 b/‎cmake/Summary.cmake
+7-7
diff --git a/‎docs/installation.md
+1-1 b/‎docs/installation.md
+1-1
diff --git a/‎examples/cpp_classification/classification.cpp
-5 b/‎examples/cpp_classification/classification.cpp
-5
@@ -12,20 +12,16 @@ env:
     # envvar defaults:
     #   WITH_CMAKE: false
     #   WITH_PYTHON3: false
-    #   WITH_IO: true
-    #   WITH_CUDA: false
+    #   WITH_CUDA: const true since v0.17
     #   WITH_CUDNN: false
     - BUILD_NAME="default-make"
 #   - BUILD_NAME="python3-make" WITH_PYTHON3=true
-    - BUILD_NAME="no-io-make" WITH_IO=false
-    - BUILD_NAME="cuda-make" WITH_CUDA=true
-    - BUILD_NAME="cudnn-make" WITH_CUDA=true WITH_CUDNN=true
+    - BUILD_NAME="cudnn-make" WITH_CUDNN=true
 
     - BUILD_NAME="default-cmake" WITH_CMAKE=true
     - BUILD_NAME="python3-cmake" WITH_CMAKE=true WITH_PYTHON3=true
-    - BUILD_NAME="no-io-cmake" WITH_CMAKE=true WITH_IO=false
-    - BUILD_NAME="cuda-cmake" WITH_CMAKE=true WITH_CUDA=true
-    - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
+    - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDNN=true
+    - BUILD_NAME="cudnn-python3-cmake" WITH_CMAKE=true WITH_CUDNN=true WITH_PYTHON3=true
 
 cache:
   apt: true
 
@@ -201,7 +201,7 @@
   #include "caffe/util/gpu_math_functions.cuh"
 #endif
 
-#if !defined(CPU_ONLY) && defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__)
   #define CAFFE_UTIL_HD __host__ __device__
   #define CAFFE_UTIL_IHD __inline__ __host__ __device__
 #else
 
@@ -29,21 +29,20 @@ include(cmake/Summary.cmake)
 include(cmake/ConfigGen.cmake)
 
 # ---[ Options
-caffe_option(CPU_ONLY  "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
-caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
+caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON)
 
 # USE_NCCL: Build Caffe with NCCL Library support
 # Regular ON/OFF option doesn't work here because we need to recognize 3 states:
 # 1. User didn't set USE_NCCL option =>
-#   1.1 If CPU_ONLY is ON we do nothing.
-#   1.2 If CPU_ONLY is OFF we *quietly* try to find it and use if found; do nothing otherwise.
+#   1.1 We *quietly* try to find it and use if found; do nothing otherwise.
 # 2. User explicitly set USE_NCCL=ON option =>
-#   1.1 If CPU_ONLY is ON we do nothing (it's higher priority).
-#   2.1 If CPU_ONLY is OFF we try to find it with *required* option, thus CMake fails if not found. 
+#   1.1 We try to find it with *required* option, thus CMake fails if not found.
 # 3. User explicitly set USE_NCCL=OFF option => we do nothing.
-SET(USE_NCCL)
-if(DEFINED USE_NCCL)
-  STRING(TOUPPER "${USE_NCCL}" USE_NCCL)
+set(USE_NCCL "NONE" CACHE STRING "Link Caffe with NCCL Library for Multi-GPU support")
+if(USE_NCCL STREQUAL "NONE")
+  set(USE_NCCL_SET OFF)
+else()
+  set(USE_NCCL_SET ON)
 endif()
 
 caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 
@@ -28,7 +28,7 @@ THIRDPARTY_DIR := ./3rdparty
 
 # All of the directories containing code.
 SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
-	\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)
+	\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print 2>/dev/null)
 
 # The target shared library name
 LIBRARY_NAME := $(PROJECT)$(LIBRARY_NAME_SUFFIX)
@@ -179,19 +179,17 @@ CUDA_LIB_DIR :=
 # add <cuda>/lib64 only if it exists
 ifneq ("$(wildcard $(CUDA_DIR)/lib64)","")
 	CUDA_LIB_DIR += $(CUDA_DIR)/lib64
-	CUDA_LIB_DIR += /usr/lib/nvidia-384 /usr/lib/nvidia-381 /usr/lib/nvidia-375 /usr/lib/nvidia-367 /usr/lib/nvidia-361 /usr/lib/nvidia-352
+	CUDA_LIB_DIR += /usr/lib/nvidia-396 /usr/lib/nvidia-390 /usr/lib/nvidia-387 /usr/lib/nvidia-384 /usr/lib/nvidia-381 /usr/lib/nvidia-375 /usr/lib/nvidia-367 /usr/lib/nvidia-361 /usr/lib/nvidia-352
 endif
 CUDA_LIB_DIR += $(CUDA_DIR)/lib
 
-INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include $(THIRDPARTY_DIR)
-ifneq ($(CPU_ONLY), 1)
-	INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
-	LIBRARY_DIRS += $(CUDA_LIB_DIR)
-	LIBRARIES := cudart cublas curand
+INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include $(THIRDPARTY_DIR) /usr/include/hdf5/serial
+INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
+LIBRARY_DIRS += $(CUDA_LIB_DIR)
+LIBRARIES := cudart cublas curand
 ifneq ($(NO_NVML), 1)
 	LIBRARIES += nvidia-ml
 endif
-endif
 
 # Note: libturbojpeg has a packaging bug. Workaround:
 # $ sudo ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so
@@ -222,7 +220,16 @@ ifeq ($(USE_OPENCV), 1)
 	endif
 
 endif
-PYTHON_LIBRARIES ?= boost_python python2.7 boost_regex
+
+python_version_full := $(wordlist 2,4,$(subst ., ,$(shell python --version 2>&1)))
+python_version_major := $(word 1,${python_version_full})
+python_version_minor := $(word 2,${python_version_full})
+python_version_patch := $(word 3,${python_version_full})
+ifeq ($(python_version_major), 3)
+	python_lib_suffix := m
+endif
+
+PYTHON_LIBRARIES ?= boost_python-py${python_version_major}${python_version_minor} python${python_version_major}.${python_version_minor}${python_lib_suffix} boost_regex
 WARNINGS := -Wall -Wno-sign-compare
 
 ##############################
@@ -294,23 +301,21 @@ endif
 # libstdc++ for NVCC compatibility on OS X >= 10.9 with CUDA < 7.0
 ifeq ($(OSX), 1)
 	CXX := /usr/bin/clang++
-	ifneq ($(CPU_ONLY), 1)
-		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
-		ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
-			CXXFLAGS += -stdlib=libstdc++
-			LINKFLAGS += -stdlib=libstdc++
-		endif
-		# clang throws this warning for cuda headers
-		WARNINGS += -Wno-unneeded-internal-declaration
-		# 10.11 strips DYLD_* env vars so link CUDA (rpath is available on 10.5+)
-		OSX_10_OR_LATER   := $(shell [ $(OSX_MAJOR_VERSION) -ge 10 ] && echo true)
-		OSX_10_5_OR_LATER := $(shell [ $(OSX_MINOR_VERSION) -ge 5 ] && echo true)
-		ifeq ($(OSX_10_OR_LATER),true)
-			ifeq ($(OSX_10_5_OR_LATER),true)
-				LDFLAGS += -Wl,-rpath,$(CUDA_LIB_DIR)
-			endif
-		endif
-	endif
+    CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
+    ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
+        CXXFLAGS += -stdlib=libstdc++
+        LINKFLAGS += -stdlib=libstdc++
+    endif
+    # clang throws this warning for cuda headers
+    WARNINGS += -Wno-unneeded-internal-declaration
+    # 10.11 strips DYLD_* env vars so link CUDA (rpath is available on 10.5+)
+    OSX_10_OR_LATER   := $(shell [ $(OSX_MAJOR_VERSION) -ge 10 ] && echo true)
+    OSX_10_5_OR_LATER := $(shell [ $(OSX_MINOR_VERSION) -ge 5 ] && echo true)
+    ifeq ($(OSX_10_OR_LATER),true)
+        ifeq ($(OSX_10_5_OR_LATER),true)
+            LDFLAGS += -Wl,-rpath,$(CUDA_LIB_DIR)
+        endif
+    endif
 	# gtest needs to use its own tuple to not conflict with clang
 	COMMON_FLAGS += -DGTEST_USE_OWN_TR1_TUPLE=1
 	# boost::thread is called boost_thread-mt to mark multithreading on OS X
@@ -376,15 +381,8 @@ ifeq ($(ALLOW_LMDB_NOLOCK), 1)
 endif
 endif
 
-# CPU-only configuration
-ifeq ($(CPU_ONLY), 1)
-	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
-	TEST_OBJS := $(TEST_CXX_OBJS)
-	TEST_BINS := $(TEST_CXX_BINS)
-	ALL_WARNS := $(ALL_CXX_WARNS)
-	TEST_FILTER := --gtest_filter="-*GPU*"
-	COMMON_FLAGS += -DCPU_ONLY
-endif
+# New place for HDF5
+LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial
 
 ifeq ($(NO_NVML), 1)
 	COMMON_FLAGS += -DNO_NVML=1
 
@@ -9,9 +9,6 @@
 # See https://github.com/NVIDIA/nccl
 # USE_NCCL := 1
 
-# CPU-only switch (uncomment to build without GPU support).
-# Disables FP16 support.
-# CPU_ONLY := 1
 # Builds tests with 16 bit float support in addition to 32 and 64 bit.
 # TEST_FP16 := 1
 
 
@@ -13,9 +13,9 @@ Here are the major features:
 * **Mixed-precision support**. It allows to store and/or compute data in either 
 64, 32 or 16 bit formats. Precision can be defined for every layer (forward and 
 backward passes might be different too), or it can be set for the whole Net.
-* **Integration with  [cuDNN](https://developer.nvidia.com/cudnn) v6**.
+* **Integration with  [cuDNN](https://developer.nvidia.com/cudnn) v7**.
 * **Automatic selection of the best cuDNN convolution algorithm**.
-* **Integration with v1.3.4 of [NCCL library](https://github.com/NVIDIA/nccl)**
+* **Integration with v2.2 of [NCCL library](https://github.com/NVIDIA/nccl)**
  for improved multi-GPU scaling.
 * **Optimized GPU memory management** for data and parameters storage, I/O buffers 
 and workspace for convolutional layers.
@@ -40,3 +40,11 @@ Please cite Caffe in your publications if it helps your research:
       Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
       Year = {2014}
     }
+
+## Useful notes
+
+Libturbojpeg library is used since 0.16.5. It has a packaging bug. Please execute the following (required for Makefile, optional for CMake):
+```
+sudo apt-get install libturbojpeg libturbojpeg-dev
+sudo ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0.1.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so
+```
@@ -1,7 +1,3 @@
-if(CPU_ONLY)
-  return()
-endif()
-
 # Known NVIDIA GPU achitectures Caffe can be compiled for.
 # This list will be used for CUDA_ARCH_NAME = All option
 set(Caffe_known_gpu_archs "30 35 50 52 60 61 70")
 
@@ -1,11 +1,8 @@
 # This list is required for static linking and exported to CaffeConfig.cmake
 set(Caffe_LINKER_LIBS "")
 
-find_package(PythonInterp ${python_version})
-
 # ---[ Boost
-find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem regex python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR})
-set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}_FOUND})
+find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem regex)
 include_directories(SYSTEM ${Boost_INCLUDE_DIR})
 list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
 
@@ -64,12 +61,7 @@ list(APPEND Caffe_LINKER_LIBS ${JPEGTurbo_LIBRARIES})
 # ---[ CUDA
 include(cmake/Cuda.cmake)
 if(NOT HAVE_CUDA)
-  if(CPU_ONLY)
-    message(STATUS "-- CUDA is disabled. Building without it...")
-  else()
-    message(WARNING "-- CUDA is not detected by cmake. Building without it...")
-  endif()
-
+  message(SEND_ERROR "-- CUDA is not detected by cmake. Building without it...")
   # TODO: remove this not cross platform define in future. Use caffe_config.h instead.
   add_definitions(-DCPU_ONLY)
 endif()
@@ -113,6 +105,20 @@ endif()
 
 # ---[ Python
 if(BUILD_python)
+  find_package(PythonInterp ${python_version})
+
+  find_library(Boost_PYTHON_FOUND NAMES
+          python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
+          boost_python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
+          boost_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
+          PATHS ${LIBDIR})
+  if ("${Boost_PYTHON_FOUND}" STREQUAL "Boost_PYTHON_FOUND-NOTFOUND")
+    message(SEND_ERROR "Could NOT find Boost Python Library")
+  else()
+    message(STATUS "Found Boost Python Library ${Boost_PYTHON_FOUND}")
+    list(APPEND Caffe_LINKER_LIBS ${Boost_PYTHON_FOUND})
+  endif()
+
   find_package(PythonLibs ${python_version})
   find_package(NumPy 1.7.1)
   if(PYTHONLIBS_FOUND AND NUMPY_FOUND AND Boost_PYTHON_FOUND)
@@ -147,14 +153,12 @@ if(BUILD_docs)
 endif()
 
 # ---[ NCCL
-if(DEFINED USE_NCCL)
-  if(USE_NCCL AND NOT CPU_ONLY)
+if(USE_NCCL_SET)
+  if(USE_NCCL)
     find_package(NCCL REQUIRED)
   endif()
 else()
-  if(NOT CPU_ONLY)
-    find_package(NCCL)
-  endif()
+  find_package(NCCL)
 endif()
 if(NCCL_FOUND)
   add_definitions(-DUSE_NCCL)
@@ -163,7 +167,7 @@ if(NCCL_FOUND)
 endif()
 
 # ---[ NVML
-if(NOT CPU_ONLY AND NOT NO_NVML)
+if(NOT NO_NVML)
   find_package(NVML)
 endif()
 if(NVML_FOUND)
 
@@ -113,7 +113,6 @@ function(caffe_print_configuration_summary)
   caffe_status("  BUILD_python      :   ${BUILD_python}")
   caffe_status("  BUILD_matlab      :   ${BUILD_matlab}")
   caffe_status("  BUILD_docs        :   ${BUILD_docs}")
-  caffe_status("  CPU_ONLY          :   ${CPU_ONLY}")
   caffe_status("  USE_LEVELDB       :   ${USE_LEVELDB}")
   caffe_status("  USE_LMDB          :   ${USE_LMDB}")
   caffe_status("  ALLOW_LMDB_NOLOCK :   ${ALLOW_LMDB_NOLOCK}")
@@ -145,13 +144,14 @@ function(caffe_print_configuration_summary)
     else()
       caffe_status("  cuDNN             :   Disabled")
     endif()
-    if(NOT DEFINED USE_NCCL)
-      caffe_status("  NCCL              : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not
-      found")
-    elseif(USE_NCCL)
-      caffe_status("  NCCL              : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found")
+    if(USE_NCCL_SET)
+      if(USE_NCCL)
+        caffe_status("  NCCL              : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found")
+      else()
+        caffe_status("  NCCL              :   Disabled")
+      endif()
     else()
-      caffe_status("  NCCL              :   Disabled")
+      caffe_status("  NCCL              : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found (not requested)")
     endif()
 
     if(NVML_FOUND)
 
@@ -49,7 +49,7 @@ Pycaffe and Matcaffe interfaces have their own natural needs.
 
 **cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe.
 
-**CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
+**CPU-only Caffe**: not supported starting from v0.17. For older versions, for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 
 ### CUDA and BLAS
 
 
@@ -46,12 +46,7 @@ Classifier::Classifier(const string& model_file,
                        const string& trained_file,
                        const string& mean_file,
                        const string& label_file) {
-#ifdef CPU_ONLY
-  Caffe::set_mode(Caffe::CPU);
-#else
   Caffe::set_mode(Caffe::GPU);
-#endif
-
   /* Load the network. */
   net_.reset(new Net(model_file, TEST));
   net_->CopyTrainedLayersFrom(trained_file);