Skip to content

Commit 4e1b276

Browse files
authored
Merge pull request #382 from drnikolaev/caffe-0.16
0.16.3 rc
2 parents 22970ac + 5cc8515 commit 4e1b276

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2095
-449
lines changed

cmake/Cuda.cmake

+8-11
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ endif()
44

55
# Known NVIDIA GPU achitectures Caffe can be compiled for.
66
# This list will be used for CUDA_ARCH_NAME = All option
7-
set(Caffe_known_gpu_archs "20 21(20) 30 35 50 60 61 62")
7+
set(Caffe_known_gpu_archs "30 35 50 52 60 61 70")
88

99
################################################################################################
1010
# A function for automatic detection of GPUs installed (if autodetection is enabled)
@@ -36,8 +36,7 @@ function(caffe_detect_installed_gpus out_variable)
3636
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
3737

3838
if(__nvcc_res EQUAL 0)
39-
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
40-
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from caffe_detect_gpus tool" FORCE)
39+
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architectures from caffe_detect_gpus tool" FORCE)
4140
endif()
4241
endif()
4342

@@ -56,22 +55,22 @@ endfunction()
5655
# caffe_select_nvcc_arch_flags(out_variable)
5756
function(caffe_select_nvcc_arch_flags out_variable)
5857
# List of arch names
59-
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual")
58+
set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual")
6059
set(__archs_name_default "All")
6160
if(NOT CMAKE_CROSSCOMPILING)
6261
list(APPEND __archs_names "Auto")
6362
set(__archs_name_default "Auto")
6463
endif()
6564

6665
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
67-
set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
66+
set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU architecture.")
6867
set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} )
6968
mark_as_advanced(CUDA_ARCH_NAME)
7069

7170
# verify CUDA_ARCH_NAME value
7271
if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
7372
string(REPLACE ";" ", " __archs_names "${__archs_names}")
74-
message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
73+
message(FATAL_ERROR "Only ${__archs_names} architecture names are supported.")
7574
endif()
7675

7776
if(${CUDA_ARCH_NAME} STREQUAL "Manual")
@@ -83,14 +82,12 @@ function(caffe_select_nvcc_arch_flags out_variable)
8382
unset(CUDA_ARCH_PTX CACHE)
8483
endif()
8584

86-
if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
87-
set(__cuda_arch_bin "20 21(20)")
88-
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
85+
if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
8986
set(__cuda_arch_bin "30 35")
9087
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
91-
set(__cuda_arch_bin "50")
88+
set(__cuda_arch_bin "50 52")
9289
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
93-
set(__cuda_arch_bin "60 61 62")
90+
set(__cuda_arch_bin "60 61")
9491
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
9592
set(__cuda_arch_bin "70")
9693
elseif(${CUDA_ARCH_NAME} STREQUAL "All")

include/caffe/blob.hpp

+6-12
Original file line numberDiff line numberDiff line change
@@ -94,24 +94,16 @@ class Blob {
9494
return diff_tensor_ ? diff_tensor_->type() : last_diff_type_;
9595
}
9696

97-
void lock_data() {
98-
data_tensor_->lock_tensor();
99-
}
100-
101-
void lock_diff() {
102-
diff_tensor_->lock_tensor();
103-
}
104-
10597
bool diff_equals(const Blob& other) const {
10698
return diff_tensor_ == other.diff_tensor_;
10799
}
108100

109101
void allocate_data(bool on_gpu = true) {
110-
data_tensor_->mutable_memory(data_tensor_->type(), on_gpu);
102+
data_tensor_->current_memory(on_gpu);
111103
}
112104

113105
void allocate_diff(bool on_gpu = true) {
114-
diff_tensor_->mutable_memory(diff_tensor_->type(), on_gpu);
106+
diff_tensor_->current_memory(on_gpu);
115107
}
116108

117109
size_t cpu_memory_data_use() const;
@@ -451,9 +443,11 @@ class Blob {
451443
*/
452444
void ShareDiff(const Blob& other);
453445

454-
455446
template<typename Dtype>
456-
void ToProto(BlobProto* proto, bool write_diff = false) const;
447+
void ToProto(BlobProto* proto, bool store_in_old_format, bool write_diff = false) const;
448+
template<typename Dtype>
449+
void ToProtoBVLC(BlobProto* proto, bool write_diff = false) const;
450+
457451
void FromProto(const BlobProto& proto, bool reshape = true);
458452
bool ShapeEquals(const BlobProto& other);
459453
std::string to_string(int indent = 0) const; // debug helper

include/caffe/common.hpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,15 @@ class Caffe {
441441
#endif
442442
}
443443

444-
static int thread_count() {
444+
#ifndef CPU_ONLY
445+
/**
446+
* Minimum memory available across all deviced currently used
447+
* @return size_t
448+
*/
449+
static size_t min_avail_device_memory();
450+
#endif
451+
452+
static int thread_count() {
445453
return thread_count_;
446454
}
447455

include/caffe/data_transformer.hpp

+66
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#ifndef CAFFE_DATA_TRANSFORMER_HPP
22
#define CAFFE_DATA_TRANSFORMER_HPP
33

4+
#ifdef USE_OPENCV
5+
6+
#include <opencv2/core/core.hpp>
7+
8+
#endif // USE_OPENCV
9+
410
#include <string>
511
#include <vector>
612

@@ -49,6 +55,46 @@ class DataTransformer {
4955
void CopyPtrEntry(shared_ptr<Datum> datum, Dtype* transformed_ptr, size_t& out_sizeof_element,
5056
bool output_labels, Dtype* label);
5157

58+
#ifdef USE_OPENCV
59+
/**
60+
* @brief Whether there are any "variable_sized" transformations defined
61+
* in the data layer's transform_param block.
62+
*/
63+
bool var_sized_transforms_enabled() const;
64+
65+
/**
66+
* @brief Calculate the final shape from applying the "variable_sized"
67+
* transformations defined in the data layer's transform_param block
68+
* on the provided image, without actually performing any transformations.
69+
*
70+
* @param orig_shape
71+
* The shape of the data to be transformed.
72+
*/
73+
vector<int> var_sized_transforms_shape(const vector<int>& orig_shape) const;
74+
75+
/**
76+
* @brief Applies "variable_sized" transformations defined in the data layer's
77+
* transform_param block to the data.
78+
*
79+
* @param old_datum
80+
* The source Datum containing data of arbitrary shape.
81+
* @param new_datum
82+
* The destination Datum that will store transformed data of a fixed
83+
* shape. Suitable for other transformations.
84+
*/
85+
shared_ptr<Datum> VariableSizedTransforms(shared_ptr<Datum> old_datum);
86+
87+
bool var_sized_image_random_resize_enabled() const;
88+
vector<int> var_sized_image_random_resize_shape(const vector<int>& prev_shape) const;
89+
cv::Mat& var_sized_image_random_resize(cv::Mat& img);
90+
bool var_sized_image_random_crop_enabled() const;
91+
vector<int> var_sized_image_random_crop_shape(const vector<int>& prev_shape) const;
92+
cv::Mat& var_sized_image_random_crop(const cv::Mat& img);
93+
bool var_sized_image_center_crop_enabled() const;
94+
vector<int> var_sized_image_center_crop_shape(const vector<int>& prev_shape) const;
95+
cv::Mat& var_sized_image_center_crop(const cv::Mat& img);
96+
#endif
97+
5298
/**
5399
* @brief Applies the transformation defined in the data layer's
54100
* transform_param block to the data.
@@ -137,6 +183,20 @@ class DataTransformer {
137183
const std::array<unsigned int, 3>& rand);
138184
#endif // USE_OPENCV
139185

186+
vector<int> InferDatumShape(const Datum& datum);
187+
#ifdef USE_OPENCV
188+
vector<int> InferCVMatShape(const cv::Mat& img);
189+
#endif // USE_OPENCV
190+
191+
/**
192+
* @brief Infers the shape of transformed_blob will have when
193+
* the transformation is applied to the data.
194+
*
195+
* @param bottom_shape
196+
* The shape of the data to be transformed.
197+
*/
198+
vector<int> InferBlobShape(const vector<int>& bottom_shape, bool use_gpu = false);
199+
140200
/**
141201
* @brief Infers the shape of transformed_blob will have when
142202
* the transformation is applied to the data.
@@ -180,6 +240,12 @@ class DataTransformer {
180240
#ifndef CPU_ONLY
181241
GPUMemory::Workspace mean_values_gpu_;
182242
#endif
243+
#ifdef USE_OPENCV
244+
cv::Mat varsz_orig_img_;
245+
cv::Mat varsz_rand_resize_img_;
246+
cv::Mat varsz_rand_crop_img_;
247+
cv::Mat varsz_center_crop_img_;
248+
#endif
183249
};
184250

185251
} // namespace caffe

include/caffe/layer.hpp

+3-13
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,6 @@ class LayerBase {
129129
*/
130130
virtual inline const char* type() const { return ""; }
131131

132-
virtual bool bias_term() const { return false; } // FIXME
133-
134132
/**
135133
* @brief Returns the layer name.
136134
*/
@@ -141,6 +139,7 @@ class LayerBase {
141139
// Iteration counter maintained by Solver
142140
int iter() const;
143141
int relative_iter() const;
142+
int iterations_sized() const;
144143

145144
void set_solver_rank(size_t solver_rank) {
146145
solver_rank_ = solver_rank;
@@ -385,6 +384,8 @@ class LayerBase {
385384
*/
386385
virtual void ToProto(LayerParameter* param, bool write_diff = false) = 0;
387386

387+
std::string print_current_device() const;
388+
388389
protected:
389390
/** The vector that stores the learnable parameters as a set of blobs. */
390391
vector<shared_ptr<Blob>> blobs_;
@@ -610,17 +611,6 @@ Layer<Ftype, Btype>::Backward(const vector<Blob*>& top, const vector<bool>& prop
610611
}
611612
}
612613

613-
// Serialize LayerParameter to protocol buffer
614-
template<typename Ftype, typename Btype>
615-
void Layer<Ftype, Btype>::ToProto(LayerParameter* param, bool write_diff) {
616-
param->Clear();
617-
param->CopyFrom(layer_param_);
618-
param->clear_blobs();
619-
for (int i = 0; i < blobs_.size(); ++i) {
620-
blobs_[i]->ToProto<Btype>(param->add_blobs(), write_diff);
621-
}
622-
}
623-
624614
} // namespace caffe
625615

626616
#endif // CAFFE_LAYER_H_

include/caffe/layers/base_conv_layer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class BaseConvolutionLayer : public Layer<Ftype, Btype> {
2727
virtual inline int MinBottomBlobs() const { return 1; }
2828
virtual inline int MinTopBlobs() const { return 1; }
2929
virtual inline bool EqualNumBottomTopBlobs() const { return true; }
30-
bool bias_term() const override { return bias_term_; }
3130

3231
protected:
3332
// Helper functions that abstract away the column buffer and gemm arguments.

include/caffe/layers/base_data_layer.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class BasePrefetchingDataLayer : public BaseDataLayer<Ftype, Btype>, public Inte
119119
void InternalThreadEntry() override;
120120
void InternalThreadEntryN(size_t thread_id) override;
121121
void ResizeQueues();
122+
void AllocatePrefetch();
122123

123124
virtual void InitializePrefetch();
124125
virtual void load_batch(Batch<Ftype>* batch, int thread_id, size_t queue_id) = 0;

include/caffe/layers/cudnn_conv_layer.hpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Ftype, Btype> {
5858
: ConvolutionLayer<Ftype, Btype>(param), handles_setup_(false),
5959
use_algo_seeker_(true), use_modest_workspace_(true),
6060
forward_math_(tpmax<Ftype, float>()), backward_data_math_(tpmax<Btype, float>()),
61-
backward_filter_math_(tpmax<Btype, float>()) {}
61+
backward_filter_math_(tpmax<Btype, float>()) {
62+
#if CUDNN_VERSION_MIN(7, 0, 0)
63+
cudnn_math_override_ = -1;
64+
#endif
65+
}
6266

6367
virtual void LayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top);
6468
virtual void Reshape(const vector<Blob*>& bottom, const vector<Blob*>& top);
@@ -77,6 +81,11 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Ftype, Btype> {
7781
vector<cudnnConvolutionBwdFilterAlgo_t> bwd_filter_algo_;
7882
vector<cudnnConvolutionBwdDataAlgo_t> bwd_data_algo_;
7983

84+
#if CUDNN_VERSION_MIN(7, 0, 0)
85+
int cudnn_math_override_;
86+
vector<cudnnMathType_t> fwd_cudnn_math_, bwd_filter_cudnn_math_, bwd_data_cudnn_math_;
87+
#endif
88+
8089
vector<cudnnTensorDescriptor_t> fwd_bottom_descs_, fwd_top_descs_;
8190
vector<cudnnTensorDescriptor_t> bwd_bottom_descs_, bwd_top_descs_;
8291
cudnnTensorDescriptor_t fwd_bias_desc_, bwd_bias_desc_;

include/caffe/layers/embed_layer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class EmbedLayer : public Layer<Ftype, Btype> {
2929
virtual inline const char* type() const { return "Embed"; }
3030
virtual inline int ExactNumBottomBlobs() const { return 1; }
3131
virtual inline int ExactNumTopBlobs() const { return 1; }
32-
bool bias_term() const override { return bias_term_; }
3332

3433
protected:
3534
virtual void Forward_cpu(const vector<Blob*>& bottom,

include/caffe/layers/inner_product_layer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ class InnerProductLayer : public Layer<Ftype, Btype> {
2828
virtual inline const char* type() const { return "InnerProduct"; }
2929
virtual inline int ExactNumBottomBlobs() const { return 1; }
3030
virtual inline int ExactNumTopBlobs() const { return 1; }
31-
bool bias_term() const override { return bias_term_; }
3231

3332
protected:
3433
virtual void Forward_cpu(const vector<Blob*>& bottom,

include/caffe/layers/scale_layer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ class ScaleLayer: public Layer<Ftype, Btype> {
3535
virtual inline int MinBottomBlobs() const { return 1; }
3636
virtual inline int MaxBottomBlobs() const { return 2; }
3737
virtual inline int ExactNumTopBlobs() const { return 1; }
38-
bool bias_term() const override { return bias_term_; }
3938

4039
protected:
4140
/**

include/caffe/layers/softmax_loss_layer.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ class SoftmaxWithLossLayer : public LossLayer<Ftype, Btype> {
5252
* present; otherwise the loss is simply summed over spatial locations.
5353
*/
5454
explicit SoftmaxWithLossLayer(const LayerParameter& param)
55-
: LossLayer<Ftype, Btype>(param) {}
55+
: LossLayer<Ftype, Btype>(param) {
56+
prob_ = Blob::create<Ftype>();
57+
}
5658
virtual void LayerSetUp(const vector<Blob*>& bottom,
5759
const vector<Blob*>& top);
5860
virtual void Reshape(const vector<Blob*>& bottom,
@@ -110,7 +112,7 @@ class SoftmaxWithLossLayer : public LossLayer<Ftype, Btype> {
110112
/// The internal SoftmaxLayer used to map predictions to a distribution.
111113
shared_ptr<Layer<Ftype, Btype> > softmax_layer_;
112114
/// prob stores the output probability predictions from the SoftmaxLayer.
113-
TBlob<Ftype> prob_; // Conversion if Ftype!=Btype
115+
shared_ptr<Blob> prob_; // Conversion if Ftype!=Btype
114116
/// bottom vector holder used in call to the underlying SoftmaxLayer::Forward
115117
vector<Blob*> softmax_bottom_vec_;
116118
/// top vector holder used in call to the underlying SoftmaxLayer::Forward

include/caffe/net.hpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ class Net {
200200
const vector<string>& param_display_names() const {
201201
return param_display_names_;
202202
}
203+
204+
const pair<int, int>& param_layer_indices(int param_id) {
205+
return param_layer_indices_[param_id];
206+
}
207+
203208
/// @brief Input and output blob numbers
204209
int num_inputs() const { return net_input_blobs_.size(); }
205210
int num_outputs() const { return net_output_blobs_.size(); }
@@ -252,8 +257,6 @@ class Net {
252257
void InitializeLearnableDiffSpace();
253258
#endif
254259

255-
size_t total_batch_size() const;
256-
257260
void wait_layers_init() {
258261
for (Flag* flag : layer_inititialized_flags_) {
259262
flag->wait();
@@ -264,6 +267,9 @@ class Net {
264267
return global_grad_scale_;
265268
}
266269

270+
size_t infer_count() const {
271+
return infer_count_;
272+
}
267273

268274
protected:
269275
// Helpers for Init.
@@ -378,6 +384,7 @@ class Net {
378384
vector<Flag*> layer_inititialized_flags_;
379385
NetParameter net_param_;
380386

387+
size_t infer_count_;
381388
float global_grad_scale_;
382389

383390
static constexpr int END_OF_ITERATION = -1;

0 commit comments

Comments
 (0)