diff --git a/examples/faster-rcnn/license.txt b/examples/faster-rcnn/license.txt index 617c1e140..31b20cd2a 100644 --- a/examples/faster-rcnn/license.txt +++ b/examples/faster-rcnn/license.txt @@ -1,8 +1,83 @@ -The MIT License -Copyright (c) +Faster R-CNN -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The MIT License (MIT) + +Copyright (c) 2015 Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +************************************************************************ + +THIRD-PARTY SOFTWARE NOTICES AND INFORMATION + +This project, Faster R-CNN, incorporates material from the project(s) +listed below (collectively, "Third Party Code"). Microsoft is not the +original author of the Third Party Code. The original copyright notice +and license under which Microsoft received such Third Party Code are set +out below. This Third Party Code is licensed to you under their original +license terms set forth below. Microsoft reserves all other rights not +expressly granted, whether by implication, estoppel or otherwise. + +1. Caffe, (https://github.com/BVLC/caffe/) + +COPYRIGHT + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. + +Caffe uses a shared copyright model: each contributor holds copyright +over their contributions to Caffe. The project versioning records all +such contribution and copyright details. If a contributor wants to +further mark their specific copyright on a particular contribution, +they should indicate their copyright solely in the commit message of +the change when it is committed. + +The BSD 2-Clause License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/examples/rfcn/license.txt b/examples/rfcn/license.txt index 617c1e140..8d734011c 100644 --- a/examples/rfcn/license.txt +++ b/examples/rfcn/license.txt @@ -1,8 +1,23 @@ -The MIT License -Copyright (c) +MIT License -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Copyright (c) 2016 Yuwen Xiong + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp index 59cb1bf28..40ad8afb8 100755 --- a/include/caffe/layers/base_conv_layer.hpp +++ b/include/caffe/layers/base_conv_layer.hpp @@ -221,12 +221,12 @@ class BaseConvolutionLayer : public Layer { } #endif - int num_kernels_im2col_; - int num_kernels_col2im_; - int conv_out_channels_; - int conv_in_channels_; - int conv_out_spatial_dim_; - int kernel_dim_; + size_t num_kernels_im2col_; + size_t num_kernels_col2im_; + size_t conv_out_channels_; + size_t conv_in_channels_; + size_t conv_out_spatial_dim_; + size_t kernel_dim_; size_t col_offset_; size_t output_offset_; diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp index aca64c362..dae314208 100644 --- a/include/caffe/layers/mkldnn_layers.hpp +++ b/include/caffe/layers/mkldnn_layers.hpp @@ -141,7 +141,7 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer, public Layer { shared_ptr input_primitive, bwd_top_diff_primitive; - int32_t num_, width_, height_, channels_; + vector shape_; Dtype eps_, moving_average_fraction_; bool use_weight_bias_, bias_term_, use_global_stats_; int num_stats_batches_; @@ -402,7 +402,7 @@ class MKLDNNReLULayer : public MKLDNNLayer , public NeuronLayer { , reluFwd_pd(), reluBwd_pd() , fwd_top_data_memory(), bwd_bottom_diff_memory() , fwd_bottom_data_primitive(), bwd_top_diff_primitive() - , num_(0), width_(0), height_(0), channels_(0) + , shape_(0) { PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); @@ -431,7 +431,7 @@ class MKLDNNReLULayer : public MKLDNNLayer , public NeuronLayer { MKLDNNPrimitive reluFwd, reluBwd; shared_ptr fwd_top_data_memory, bwd_bottom_diff_memory; shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive, bwd_bottom_data_primitive; - int32_t num_, width_, height_, channels_; + vector shape_; PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); @@ -480,7 +480,8 @@ class MKLDNNConcatLayer : public MKLDNNLayer , public Layer { vector split_dims; bool in_place_; - int32_t num_, width_, height_, channels_, num_concats_; + int32_t num_concats_; + vector shape_; int concat_dimension; PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); @@ -537,7 +538,7 @@ class MKLDNNEltwiseLayer : public MKLDNNLayer , public Layer { , eltwiseFwd_pd() , fwd_top_data_memory() , fwd_bottom_data_primitives_() - , num_(0), width_(0), height_(0), channels_(0) + , shape_(0) , num_bottoms_(0) { PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); @@ -573,7 +574,7 @@ class MKLDNNEltwiseLayer : public MKLDNNLayer , public Layer { EltwiseParameter_EltwiseOp op_; vector coeffs_; Blob max_idx_; - int32_t num_, width_, height_, channels_; + vector shape_; int32_t num_bottoms_; bool stable_prod_grad_; diff --git a/src/caffe/layers/mkldnn_batch_norm_layer.cpp b/src/caffe/layers/mkldnn_batch_norm_layer.cpp index d3f76bfbd..b19ab7bab 100644 --- a/src/caffe/layers/mkldnn_batch_norm_layer.cpp +++ b/src/caffe/layers/mkldnn_batch_norm_layer.cpp @@ -64,10 +64,8 @@ void MKLDNNBatchNormLayer::LayerSetUp(const vector*>& bottom Layer::LayerSetUp(bottom, top); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - num_ = bottom[0]->num(); + shape_ = bottom[0]->shape(); + const int channels = shape_[1]; eps_ = this->layer_param_.batch_norm_param().eps(); use_weight_bias_ = this->layer_param_.batch_norm_param().use_weight_bias(); @@ -77,12 +75,12 @@ void MKLDNNBatchNormLayer::LayerSetUp(const vector*>& bottom if (this->layer_param_.batch_norm_param().has_use_global_stats()) use_global_stats_ = this->layer_param_.batch_norm_param().use_global_stats(); - InitStatsBatchVars(num_); + InitStatsBatchVars(shape_[0]); this->blobs_.resize(3 + (use_weight_bias_ ? 1:0) + (use_weight_bias_ && bias_term_ ? 1:0)); vector sz; - sz.push_back(channels_); + sz.push_back(channels); this->blobs_[0].reset(new Blob(sz)); this->blobs_[1].reset(new Blob(sz)); sz[0]=1; @@ -96,7 +94,7 @@ void MKLDNNBatchNormLayer::LayerSetUp(const vector*>& bottom //Optimization: use the temp blob to combine the scale and shift together. Avoid the additional copies. // Initialize scale and shift combination blob vector scaleshift_blob_shape(1); - scaleshift_blob_shape[0] = 2*channels_; + scaleshift_blob_shape[0] = 2*channels; scaleshift_blob_.reset(new Blob(scaleshift_blob_shape)); //Should initialize the scaleshift_blob_ buffer to 0, because when bias_term_ == false, need to pass zero bias to MKLDNN caffe_set(scaleshift_blob_shape[0], static_cast(0), @@ -111,8 +109,8 @@ void MKLDNNBatchNormLayer::LayerSetUp(const vector*>& bottom if (use_weight_bias_) { // Initialize scale and shift vector scaleshift_shape(1); - scaleshift_shape[0] = channels_; - VLOG(1) << "MKLDNNBatchNormLayer::LayerSetUp: channels_ = " << channels_; + scaleshift_shape[0] = channels; + VLOG(1) << "MKLDNNBatchNormLayer::LayerSetUp: channels_ = " << channels; this->blobs_[3].reset(new Blob(scaleshift_shape)); this->blobs_[3]->set_cpu_data(scaleshift_blob_->mutable_cpu_data()); @@ -128,8 +126,8 @@ void MKLDNNBatchNormLayer::LayerSetUp(const vector*>& bottom if (bias_term_) { this->blobs_[4].reset(new Blob(scaleshift_shape)); - this->blobs_[4]->set_cpu_data(scaleshift_blob_->mutable_cpu_data() + scaleshift_blob_->offset(channels_)); - this->blobs_[4]->set_cpu_diff(scaleshift_diff_blob->mutable_cpu_diff() + scaleshift_blob_->offset(channels_)); + this->blobs_[4]->set_cpu_data(scaleshift_blob_->mutable_cpu_data() + scaleshift_blob_->offset(channels)); + this->blobs_[4]->set_cpu_diff(scaleshift_diff_blob->mutable_cpu_diff() + scaleshift_blob_->offset(channels)); FillerParameter bias_filler_param(this->layer_param_.batch_norm_param().bias_filler()); if (!this->layer_param_.batch_norm_param().has_bias_filler()) { bias_filler_param.set_type("constant"); @@ -161,17 +159,10 @@ void MKLDNNBatchNormLayer::Reshape(const vector*>& bottom { VLOG(1) << "MKLDNNBatchNormLayer::Reshape: " << this->layer_param_.name(); - this->reshape = (this->width_ == bottom[0]->width() && - this->height_ == bottom[0]->height() && - this->channels_ == bottom[0]->channels() && - this->num_ == bottom[0]->num()) ? false : true; + this->reshape = (this->shape_ == bottom[0]->shape()) ? false : true; + this->shape_ = bottom[0]->shape(); - this->width_ = bottom[0]->width(); - this->height_ = bottom[0]->height(); - this->num_ = bottom[0]->num(); - this->channels_ = bottom[0]->channels(); - - InitStatsBatchVars(this->num_); + InitStatsBatchVars(this->shape_[0]); //Fix: should reshape the top blob with the real size of bottom blob //top[0]->Reshape(this->num_, this->channels_, this->height_, this->width_); @@ -194,10 +185,19 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott if (use_weight_bias_) flags |= use_scale_shift; if (use_global_stats_) flags |= use_global_stats; - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; + memory::format src_mfmt; + auto tensor_size = this->shape_.size(); + memory::dims dim = this->shape_; + if(tensor_size == 5) { + src_mfmt = memory::format::ncdhw; + } else { + CHECK_LE(tensor_size, 4) + << "mkldnn batch normalization layer only supports dim size <= 5!"; + if (tensor_size < 4) dim.resize(4, 1); // extend to nchw with dim 1 to match mkldnn format + src_mfmt = memory::format::nchw; + } + + const int channels = this->shape_[1]; bool bottom_data_is_prv = (const_cast(bottom[0]->prv_data()) != NULL); @@ -216,13 +216,13 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott usr_mpd = mem_descr->usr_memory_pd(); prv_mpd = mem_descr->prv_memory_pd(); } else { - input_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); //MKLDNN batch norm only support 4D memory descriptor! + input_md.reset(new memory::desc({dim}, mpcsn, src_mfmt)); usr_mpd.reset(new memory::primitive_desc(*input_md, cpu_engine)); } output_md = input_md; input_stats_md.reset(new memory::desc(*input_md)); CHECK(input_stats_md->data.ndims > 0 && - input_stats_md->data.dims[0] == this->num_); + input_stats_md->data.dims[0] == this->shape_[0]); input_stats_md->data.dims[0] = stats_batch_size_; // ---- Initialize BatchNorm primitive descriptor ------------- @@ -262,7 +262,7 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott if (use_weight_bias_) { //For test in train, memory address of blobs_[3] and blobs_[4] will be changed when share data from train net. If the address // of blobs_[3] and blobs_[4] are continued, we will use them immediately, otherwise we will copy them to scaleshift_blob_ in Forward. - if((this->blobs_[3]->mutable_cpu_data() + this->blobs_[3]->offset(channels_)) == this->blobs_[4]->mutable_cpu_data()){ + if((this->blobs_[3]->mutable_cpu_data() + this->blobs_[3]->offset(channels)) == this->blobs_[4]->mutable_cpu_data()){ scaleshift_memory.reset(new memory(BatchNormFwd_pd->weights_primitive_desc(), this->blobs_[3]->mutable_cpu_data())); }else { scaleshift_memory.reset(new memory(BatchNormFwd_pd->weights_primitive_desc(), this->scaleshift_blob_->mutable_cpu_data())); @@ -309,8 +309,8 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott LOG(INFO) << "MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output!"; #endif vector top_shape; - top_shape.push_back(bottom[0]->num()); - top_shape.push_back(bottom[0]->channels()); + top_shape.push_back(bottom[0]->shape(0)); + top_shape.push_back(bottom[0]->shape(1)); top[0]->Reshape(top_shape); } } @@ -319,12 +319,15 @@ template template shared_ptr MKLDNNBatchNormLayer::GetStatsBatchMemory( shared_ptr > mkldnn_mem, int idx) { - long data_offset = - idx * stats_batch_size_ * this->channels_ * this->width_ * this->height_; + int length = this->shape_[1]; + for(int i=2;ishape_.size();i++) + length *= this->shape_[i]; + + long data_offset = idx * stats_batch_size_ * length; engine cpu_engine = CpuEngine::Instance().get_engine(); shared_ptr stats_md = mkldnn_mem->get_memory_desc(); CHECK(stats_md->data.ndims > 0 && - stats_md->data.dims[0] == this->num_); + stats_md->data.dims[0] == this->shape_[0]); stats_md->data.dims[0] = stats_batch_size_; shared_ptr stats_mpd( new memory::primitive_desc(*stats_md, cpu_engine)); @@ -338,6 +341,8 @@ void MKLDNNBatchNormLayer::InitBatchNormFwdPrimitive(int idx) { input_stats[idx] = GetStatsBatchMemory(fwd_bottom_data, idx); output_stats[idx] = GetStatsBatchMemory(fwd_top_data, idx); + const int channels = this->shape_[1]; + // ---- Create BatchNorm -------------------- if (this->phase_ == TEST && !use_global_stats_) { if (use_weight_bias_) { @@ -353,9 +358,9 @@ void MKLDNNBatchNormLayer::InitBatchNormFwdPrimitive(int idx) { variance_memory[idx].reset(new memory(BatchNormFwd_pd->variance_primitive_desc())); if (use_global_stats_) { - caffe_copy(this->channels_, this->blobs_[0]->cpu_data(), + caffe_copy(channels, this->blobs_[0]->cpu_data(), static_cast(mean_memory[idx]->get_data_handle())); - caffe_copy(this->channels_, this->blobs_[1]->cpu_data(), + caffe_copy(channels, this->blobs_[1]->cpu_data(), static_cast(variance_memory[idx]->get_data_handle())); if (use_weight_bias_) { BatchNormFwd[idx].reset(new batch_normalization_forward(*BatchNormFwd_pd, @@ -398,9 +403,11 @@ void MKLDNNBatchNormLayer::Forward_cpu(const vector*>& bottom // update top that head at prv fwd_top_data->sync_before_write(); - if((this->blobs_[3]->mutable_cpu_data() + this->blobs_[3]->offset(channels_)) != this->blobs_[4]->mutable_cpu_data()){ - caffe_copy(channels_, this->blobs_[3]->cpu_data(), this->scaleshift_blob_->mutable_cpu_data()); - caffe_copy(channels_, this->blobs_[4]->cpu_data(), this->scaleshift_blob_->mutable_cpu_data() + scaleshift_blob_->offset(channels_)); + const int channels = this->shape_[1]; + + if((this->blobs_[3]->mutable_cpu_data() + this->blobs_[3]->offset(channels)) != this->blobs_[4]->mutable_cpu_data()){ + caffe_copy(channels, this->blobs_[3]->cpu_data(), this->scaleshift_blob_->mutable_cpu_data()); + caffe_copy(channels, this->blobs_[4]->cpu_data(), this->scaleshift_blob_->mutable_cpu_data() + scaleshift_blob_->offset(channels)); } for (int stats_batch_idx = 0; stats_batch_idx < num_stats_batches_; stats_batch_idx++) { @@ -429,11 +436,11 @@ void MKLDNNBatchNormLayer::Forward_cpu(const vector*>& bottom Dtype *variance_buffer_ = (Dtype *)(variance_memory[stats_batch_idx]->get_data_handle()); this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; this->blobs_[2]->mutable_cpu_data()[0] += 1; - caffe_cpu_axpby(this->channels_, Dtype(1), mean_buffer_, + caffe_cpu_axpby(channels, Dtype(1), mean_buffer_, moving_average_fraction_, this->blobs_[0]->mutable_cpu_data()); - int m = bottom[0]->count()/num_stats_batches_/channels_; + int m = bottom[0]->count()/num_stats_batches_/channels; Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1; - caffe_cpu_axpby(this->channels_, bias_correction_factor, + caffe_cpu_axpby(channels, bias_correction_factor, variance_buffer_, moving_average_fraction_, this->blobs_[1]->mutable_cpu_data()); } @@ -450,10 +457,17 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( { if (std::is_same::value) NOT_IMPLEMENTED; - int32_t n = this->num_; - int32_t w = this->width_; - int32_t h = this->height_; - int32_t c = this->channels_; + memory::format src_mfmt; + auto tensor_size = this->shape_.size(); + memory::dims dim = this->shape_; + if(tensor_size == 5) { + src_mfmt = memory::format::ncdhw; + } else { + CHECK_LE(tensor_size, 4) + << "mkldnn batch normalization layer only supports dim size <= 5!"; + if (tensor_size < 4) dim.resize(4, 1); // extend to nchw with dim 1 to match mkldnn format + src_mfmt = memory::format::nchw; + } unsigned flags = 0; if (use_weight_bias_) flags |= use_scale_shift; @@ -475,16 +489,16 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( usr_diff_mpd = mem_descr->usr_memory_pd(); prv_diff_mpd = mem_descr->prv_memory_pd(); } else { - top_diff_md.reset(new memory::desc({{n, c, h, w}}, mpcsn, memory::format::nchw)); //MKLDNN batch norm only support 4D memory descriptor! + top_diff_md.reset(new memory::desc({dim}, mpcsn, src_mfmt)); //MKLDNN batch norm only support 4D memory descriptor! usr_diff_mpd.reset(new memory::primitive_desc(*top_diff_md, cpu_engine)); } top_diff_stats_md.reset(new memory::desc(*top_diff_md)); CHECK(top_diff_stats_md->data.ndims > 0 && - top_diff_stats_md->data.dims[0] == this->num_); + top_diff_stats_md->data.dims[0] == this->shape_[0]); top_diff_stats_md->data.dims[0] = stats_batch_size_; output_stats_md.reset(new memory::desc(output_memory->get_primitive_desc().desc())); CHECK(output_stats_md->data.ndims > 0 && - output_stats_md->data.dims[0] == this->num_); + output_stats_md->data.dims[0] == this->shape_[0]); output_stats_md->data.dims[0] = stats_batch_size_; // ---- Initialize bnrm primitive descriptor ------------- diff --git a/src/caffe/layers/mkldnn_concat_layer.cpp b/src/caffe/layers/mkldnn_concat_layer.cpp index 9299f9cea..6143f57ae 100644 --- a/src/caffe/layers/mkldnn_concat_layer.cpp +++ b/src/caffe/layers/mkldnn_concat_layer.cpp @@ -73,96 +73,33 @@ void MKLDNNConcatLayer::LayerSetUp(const vector*>& bottom, concat_dimension = bottom[0]->CanonicalAxisIndex(concat_param.axis()); } + vector bottom_0_shape = bottom[0]->shape(); + bottom_0_shape[concat_dimension] = 0; for (auto i = 1; i < num_concats_; ++i) { - if (concat_dimension == 0) - { - CHECK_EQ(bottom[0]->channels(), bottom[i]->channels()); - CHECK_EQ(bottom[0]->height(), bottom[i]->height()); - CHECK_EQ(bottom[0]->width(), bottom[i]->width()); - break; - } - else if (concat_dimension == 1) - { - CHECK_EQ(bottom[0]->num(), bottom[i]->num()); - if (!concat_param.per_fla_fuse()){ - CHECK_EQ(bottom[0]->height(), bottom[i]->height()); - CHECK_EQ(bottom[0]->width(), bottom[i]->width()); - } - break; - } - else if (concat_dimension == 2) - { - CHECK_EQ(bottom[0]->num(), bottom[i]->num()); - CHECK_EQ(bottom[0]->channels(), bottom[i]->channels()); - CHECK_EQ(bottom[0]->width(), bottom[i]->width()); - break; - } - else if (concat_dimension == 3) - { - CHECK_EQ(bottom[0]->num(), bottom[i]->num()); - CHECK_EQ(bottom[0]->channels(), bottom[i]->channels()); - CHECK_EQ(bottom[0]->height(), bottom[i]->height()); - break; + vector bottom_i_shape = bottom[i]->shape(); + bottom_i_shape[concat_dimension] = 0; + if (concat_dimension == 1 && concat_param.per_fla_fuse()) { + for(int i = 0; i < concat_dimension + 1; i++) CHECK_EQ(bottom_i_shape[i] == bottom_0_shape[i], true); + continue; } + CHECK_EQ(bottom_0_shape == bottom_i_shape, true); } split_dims.reserve(num_concats_); - if (concat_dimension == 0) - { - num_ = 0; - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); + shape_ = bottom[0]->shape(); + shape_[concat_dimension] = 0; + if (concat_dimension == 1 && concat_param.per_fla_fuse()) { + for(int i= concat_dimension + 1; i < shape_.size(); i++) shape_[i] = 1; for (auto i = 0; i < num_concats_; ++i) { CHECK_EQ(dim_src, bottom[i]->shape().size()); - split_dims[i] = bottom[i]->num(); - num_ += split_dims[i]; + split_dims[i] = bottom[i]->count(concat_dimension); + shape_[concat_dimension] += split_dims[i]; } - } - else if (concat_dimension == 1) - { - num_ = bottom[0]->num(); - channels_ = 0; - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - if (concat_param.per_fla_fuse()){ - height_ = 1; - width_ = 1; - for (auto i = 0; i < num_concats_; ++i) { - CHECK_EQ(dim_src, bottom[i]->shape().size()); - split_dims[i] = bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); - channels_ += split_dims[i]; - } - } else{ - for (auto i = 0; i < num_concats_; ++i) { - CHECK_EQ(dim_src, bottom[i]->shape().size()); - split_dims[i] = bottom[i]->channels(); - channels_ += split_dims[i]; - } - } - } - else if (concat_dimension == 2) - { - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = 0; - width_ = bottom[0]->width(); - for (auto i = 0; i < num_concats_; ++i) { - CHECK_EQ(dim_src, bottom[i]->shape().size()); - split_dims[i] = bottom[i]->height(); - height_ += split_dims[i]; - } - } - else if (concat_dimension == 3) - { - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = 0; + } else { for (auto i = 0; i < num_concats_; ++i) { CHECK_EQ(dim_src, bottom[i]->shape().size()); - split_dims[i] = bottom[i]->width(); - width_ += split_dims[i]; + split_dims[i] = bottom[i]->shape(concat_dimension); + shape_[concat_dimension] += split_dims[i]; } } } @@ -172,100 +109,31 @@ void MKLDNNConcatLayer::Reshape(const vector*>& bottom, const vector*>& top) { VLOG(1) << "MKLDNNConcatLayer::Reshape: " << this->layer_param_.name(); const ConcatParameter& concat_param = this->layer_param_.concat_param(); - if (concat_dimension == 0) - { - //Need to re-calculate the shape duo to the change of batch size - num_ = 0; - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - //Also need to reshape the concat dim, in case the concat dim is just be reshaped by batch size - for (auto i = 0; i < num_concats_; ++i) { - split_dims[i] = bottom[i]->num(); - num_ += split_dims[i]; - } - if (this->channels_ == bottom[0]->channels() && - this->height_ == bottom[0]->height() && - this->width_ == bottom[0]->width()) { - this->reshape = false; - } else { - this->reshape = true; - } - } - else if (concat_dimension == 1) - { - num_ = bottom[0]->num(); - channels_ = 0; - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - if (concat_param.per_fla_fuse()){ - height_ = 1; - width_ = 1; - for (auto i = 0; i < num_concats_; ++i) { - split_dims[i] = bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); - channels_ += split_dims[i]; - } - if (this->num_ == bottom[0]->num()) { - this->reshape = false; - } else { - this->reshape = true; - } + vector dim = bottom[0]->shape(); + int dim_src = bottom[0]->shape().size(); - } else{ - for (auto i = 0; i < num_concats_; ++i) { - split_dims[i] = bottom[i]->channels(); - channels_ += split_dims[i]; - } - if (this->num_ == bottom[0]->num() && - this->height_ == bottom[0]->height() && - this->width_ == bottom[0]->width()) { - this->reshape = false; - } else { - this->reshape = true; - } - } - } - else if (concat_dimension == 2) - { - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = 0; - width_ = bottom[0]->width(); + split_dims.clear(); + dim[concat_dimension] = 0; + if (concat_dimension == 1 && concat_param.per_fla_fuse()) { + for(int i= concat_dimension + 1; i < shape_.size(); i++) dim[i] = 1; for (auto i = 0; i < num_concats_; ++i) { - split_dims[i] = bottom[i]->height(); - height_ += split_dims[i]; - } - - if (this->num_ == bottom[0]->num() && - this->channels_ == bottom[0]->channels() && - this->width_ == bottom[0]->width()) { - this->reshape = false; - } else { - this->reshape = true; + CHECK_EQ(dim_src, bottom[i]->shape().size()); + split_dims[i] = bottom[i]->count(concat_dimension); + dim[concat_dimension] += split_dims[i]; } - } - else if (concat_dimension == 3) - { - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = 0; + } else { for (auto i = 0; i < num_concats_; ++i) { - split_dims[i] = bottom[i]->width(); - width_ += split_dims[i]; + CHECK_EQ(dim_src, bottom[i]->shape().size()); + split_dims[i] = bottom[i]->shape(concat_dimension); + dim[concat_dimension] += split_dims[i]; } + } - if (this->num_ == bottom[0]->num() && - this->channels_ == bottom[0]->channels() && - this->height_ == bottom[0]->height()) { - this->reshape = false; - } else { - this->reshape = true; - } - } + this->reshape = (dim != shape_); + shape_ = dim; - top[0]->Reshape(num_, channels_, height_, width_); + top[0]->Reshape(shape_); } template @@ -273,37 +141,20 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, const vector*>& top) { if (std::is_same::value) NOT_IMPLEMENTED; - //Fix: MKLDNN concat layer should use 4D blob as input! Reshape the 2D input blob into 4D for calculation! - bool has_spatial = (bottom[0]->shape().size() != 2); -#ifdef DEBUG - LOG(INFO) << "has_spatial flag value: " << has_spatial; -#endif - if (has_spatial == false) - { -#ifdef DEBUG - LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); - LOG(INFO) << "size of top blob: " << top[0]->shape().size(); - LOG(INFO) << "MKLDNN concat layer only support 4D blob as input! Reshape the 2D input blob into 4D for calculation!"; -#endif - for (auto i = 0; i < num_concats_; i++) - { - vector bottom_4D_shape; - int bottom_4D_height = 1; - int bottom_4D_width = 1; - bottom_4D_shape.push_back(bottom[i]->num()); - bottom_4D_shape.push_back(bottom[i]->channels()); - bottom_4D_shape.push_back(bottom_4D_height); - bottom_4D_shape.push_back(bottom_4D_width); - bottom[i]->Reshape(bottom_4D_shape, false); - } - } engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type usr_dt = memory::data_type::f32; memory::data_type prv_dt = usr_dt; // memory::format mfmt_any = memory::format::any; - memory::format mfmt_nchw = memory::format::nchw; + memory::format mfmt_out; + if(this->shape_.size() == 5) { + mfmt_out = memory::format::ncdhw; + } else { + CHECK_LE(this->shape_.size(), 4) << "mkldnn concat layer doesn't support this dim size!"; + mfmt_out = memory::format::nchw; + } - memory::dims output_tz = {num_, channels_, height_, width_}; + memory::dims output_tz = this->shape_; + if(output_tz.size() < 4) output_tz.resize(4, 1); // resize to nchw with dim 1 std::vector srcs_mpd; std::vector srcs; fwd_bottom_data.clear(); @@ -345,28 +196,14 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, fwd_bottom_data.push_back(boost::shared_ptr >()); mem_descr.push_back(boost::shared_ptr>()); - memory::dims input_tz = {0, 0, 0, 0}; - if (concat_dimension == 0) - { - input_tz = {split_dims[i], channels_, height_, width_}; - } - else if (concat_dimension == 1) - { - input_tz = {num_, split_dims[i], height_, width_}; - } - else if (concat_dimension == 2) - { - input_tz = {num_, channels_, split_dims[i], width_}; - } - else if (concat_dimension == 3) - { - input_tz = {num_, channels_, height_, split_dims[i]}; - } + memory::dims input_tz = this->shape_; + if(input_tz.size() < 4) input_tz.resize(4, 1); // resize to nchw with dim 1 + input_tz[concat_dimension] = split_dims[i]; - memory::format src_mfmt = mfmt_nchw; + memory::format src_mfmt = mfmt_out; shared_ptr prv_src_mpd; shared_ptr usr_src_mpd( - new memory::primitive_desc({input_tz, usr_dt, mfmt_nchw}, cpu_engine)); + new memory::primitive_desc({input_tz, usr_dt, mfmt_out}, cpu_engine)); if (const_cast(bottom[i]->prv_data()) != NULL) { scale = 1.; @@ -396,7 +233,7 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, } shared_ptr usr_dst_mpd(new memory::primitive_desc( - {output_tz, usr_dt, mfmt_nchw}, cpu_engine)); + {output_tz, usr_dt, mfmt_out}, cpu_engine)); concatFwd_pd.reset(new concat::primitive_desc(concat_dimension, srcs_mpd)); @@ -414,7 +251,7 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, fwd_output_memory = fwd_top_data->create_output_memory(); - memory::format base_mfmt = mfmt_nchw; + memory::format base_mfmt = mfmt_out; float base_scale = 1.; this->in_place_ = true; @@ -423,7 +260,7 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, base_mfmt = src_mfmts[i]; base_scale = bottom_scales[i]; } - else if((concat_dimension != 0 && bottom[i]->shape()[concat_dimension - 1] != 1) || base_mfmt != src_mfmts[i] || fabs(base_scale-bottom_scales[i]) > FLT_MIN || different_input_dt) { + else if((concat_dimension != 0 && bottom[i]->shape()[concat_dimension - 1] != 1) || base_mfmt != src_mfmts[i] || fabs(base_scale-bottom_scales[i]) > FLT_MIN || different_input_dt || bottom[i]->prv_data() == NULL) { this->in_place_ = false; break; } @@ -468,15 +305,24 @@ void MKLDNNConcatLayer::InitConcatBwd(const vector*>& top, engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type data_type = memory::data_type::f32; // memory::format mfmt_any = memory::format::any; - memory::format mfmt_nchw = memory::format::nchw; - memory::format diff_dst_mfmt = mfmt_nchw; + memory::format mfmt_out; + memory::dims offsets; + if(this->shape_.size() == 5) { + mfmt_out = memory::format::ncdhw; + offsets = {0, 0, 0, 0, 0}; + } else { + mfmt_out = memory::format::nchw; + offsets = {0, 0, 0, 0}; + } + + memory::format diff_dst_mfmt = mfmt_out; - memory::dims input_tz = {num_, channels_, height_, width_}; - memory::dims offsets = {0, 0, 0, 0}; + memory::dims input_tz = this->shape_; + if (input_tz.size() < 4) input_tz.resize(4, 1); shared_ptr prv_diff_dst_mpd; shared_ptr usr_diff_dst_mpd( - new memory::primitive_desc({input_tz, data_type, mfmt_nchw}, + new memory::primitive_desc({input_tz, data_type, mfmt_out}, cpu_engine)); bool top_diff_is_prv = (const_cast(top[0]->prv_diff()) != NULL); @@ -502,26 +348,12 @@ void MKLDNNConcatLayer::InitConcatBwd(const vector*>& top, bwd_bottom_diff.push_back(boost::shared_ptr >()); reorders.push_back(MKLDNNPrimitive()); - memory::dims dims = {0, 0, 0, 0}; - if (concat_dimension == 0) - { - dims = {split_dims[i], channels_, height_, width_}; - } - else if (concat_dimension == 1) - { - dims = {num_, split_dims[i], height_, width_}; - } - else if (concat_dimension == 2) - { - dims = {num_, channels_, split_dims[i], width_}; - } - else if (concat_dimension == 3) - { - dims = {num_, channels_, height_, split_dims[i]}; - } + memory::dims dims = this->shape_; + if (dims.size() < 4) dims.resize(4, 1); + dims[concat_dimension] = split_dims[i]; shared_ptr usr_diff_src_mpd( - new memory::primitive_desc({dims, data_type, mfmt_nchw}, + new memory::primitive_desc({dims, data_type, mfmt_out}, cpu_engine)); shared_ptr prv_diff_src_mpd( new memory::primitive_desc({dims, data_type, diff_dst_mfmt}, @@ -587,8 +419,24 @@ void MKLDNNConcatLayer::Backward_cpu(const vector*>& top LOG(INFO) << "MKLDNNConcatLayer::Backward_cpu: " << this->layer_param_.name(); #endif - if ((reorders.size() == 0) || (true == this->reshape)) + if ((reorders.size() == 0) || (true == this->reshape)) { + bool concat_axis = true; + for (auto i = 0; i < num_concats_; i++) { + if (bottom[i]->shape()[concat_dimension] % 16 != 0) { + concat_axis = false; + break; + } + } + // mkldnn view primitive creation has restriction if viewed area (offset or size) is not + // aligned on block size (would assert if such case happens). + // This case usually would be triggered when the bottom blob's dim along concat axis is + // not 16-dividable and the top blob's block size becomes 16 due to the following mkldnn + // layer's reorder. + // To avoid such assertion, we have to explicitly convert the top blob state from prv to cpu state. + if (top[0]->prv_diff() != NULL && !concat_axis) + top[0]->mutable_cpu_diff(); InitConcatBwd(top, propagate_down, bottom); + } bwd_top_diff->sync_before_read(); for (auto i = 0; i < num_concats_; ++i) { bwd_bottom_diff[i]->sync_before_write(); diff --git a/src/caffe/layers/mkldnn_eltwise_layer.cpp b/src/caffe/layers/mkldnn_eltwise_layer.cpp index aed8db34f..b67ce4052 100644 --- a/src/caffe/layers/mkldnn_eltwise_layer.cpp +++ b/src/caffe/layers/mkldnn_eltwise_layer.cpp @@ -78,15 +78,10 @@ template void MKLDNNEltwiseLayer::Reshape(const vector*>& bottom, const vector*>& top) { VLOG(1) << "MKLDNNEltwiseLayer::Reshape: " << this->layer_param_.name(); - this->reshape = (this->width_ == bottom[0]->width() && - this->height_ == bottom[0]->height() && - this->channels_ == bottom[0]->channels() && - this->num_ == bottom[0]->num()) ? false : true; - - this->width_ = bottom[0]->width(); - this->height_ = bottom[0]->height(); - this->num_ = bottom[0]->num(); - this->channels_ = bottom[0]->channels(); + this->reshape = (this->shape_ == bottom[0]->shape()) ? false : true; + this->shape_ = bottom[0]->shape(); + CHECK_LE(this->shape_.size(), 5) + << "Tensor dimension must be less than 6"; switch (op_) { @@ -129,11 +124,6 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto { if (std::is_same::value) NOT_IMPLEMENTED; - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; - // If we just do simple adding, scale is 1.0 for all inputs we have std::vector scale(num_bottoms_, 1.0); //Eltwise layer is supporting multiplication coefficient and this scale value can be used for that. @@ -144,7 +134,17 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type mpcsn = memory::data_type::f32; - memory::format mfmt_nchw = memory::format::nchw; + + memory::format src_mfmt; + auto tensor_size = this->shape_.size(); + memory::dims dim = this->shape_; + if(tensor_size == 5) { + src_mfmt = memory::format::ncdhw; + } else { + CHECK_LE(tensor_size, 4) << "The mkldnn eltwise layer only supports dim size <= 5!"; + if (tensor_size < 4) dim.resize(4, 1); + src_mfmt = memory::format::nchw; + } // ---- Initialize memory descriptors ------------- std::vector prv_dt(num_bottoms_, memory::data_type::f32); @@ -177,10 +177,10 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto for (auto i = 0; i < num_bottoms_; i++) { fwd_bottom_data.push_back(boost::shared_ptr >()); - memory::format bottom_data_mfmt = mfmt_nchw; + memory::format bottom_data_mfmt = src_mfmt; shared_ptr prv_bottom_data_mpd; shared_ptr usr_bottom_data_mpd( - new memory::primitive_desc({{n, ic, ih, iw}, mpcsn, mfmt_nchw}, cpu_engine)); + new memory::primitive_desc({dim, mpcsn, src_mfmt}, cpu_engine)); bool bottom_data_is_prv = (const_cast(bottom[i]->prv_data()) != NULL); if (bottom_data_is_prv) @@ -194,11 +194,11 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto mem_descr->prv_memory_pd()->desc().data.data_type); } prv_bottom_data_mpd.reset(new memory::primitive_desc( - {{n, ic, ih, iw}, bottom_data_dt, bottom_data_mfmt}, cpu_engine)); + {dim, bottom_data_dt, bottom_data_mfmt}, cpu_engine)); } bottom_data_mpd.push_back(memory::primitive_desc( - {{n, ic, ih, iw}, bottom_data_dt, bottom_data_mfmt}, cpu_engine)); + {dim, bottom_data_dt, bottom_data_mfmt}, cpu_engine)); fwd_bottom_data[i].reset(new MKLDNNData( usr_bottom_data_mpd, prv_bottom_data_mpd, bottom[i], this)); @@ -208,13 +208,13 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto } shared_ptr usr_top_data_mpd(new memory::primitive_desc( - {{n, ic, ih, iw}, mpcsn, mfmt_nchw}, cpu_engine)); + {dim, mpcsn, src_mfmt}, cpu_engine)); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); if (subengines.find("MKLDNN") == std::string::npos || subengines == "MKLDNN") subengines = "MKLDNN:CPU"; - eltwiseFwd_pd.reset(new sum::primitive_desc({{n, ic, ih, iw}, bottom_data_dt, memory::format::any}, scale, bottom_data_mpd)); + eltwiseFwd_pd.reset(new sum::primitive_desc({dim, bottom_data_dt, memory::format::any}, scale, bottom_data_mpd)); CHECK(eltwiseFwd_pd); shared_ptr prv_top_data_mpd(new memory::primitive_desc(eltwiseFwd_pd->dst_primitive_desc())); diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 4c5f83456..78e5125c6 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -60,14 +60,12 @@ void MKLDNNReLULayer::Reshape(const vector*>& bottom NeuronLayer::Reshape(bottom, top); - this->reshape = (this->width_ == bottom[0]->width() && - this->height_ == bottom[0]->height() && - this->channels_ == bottom[0]->channels() && - this->num_ == bottom[0]->num()) ? false : true; - this->width_ = bottom[0]->width(); - this->height_ = bottom[0]->height(); - this->num_ = bottom[0]->num(); - this->channels_ = bottom[0]->channels(); + this->reshape = (this->shape_ == bottom[0]->shape()) ? false : true; + + this->shape_ = bottom[0]->shape(); + + CHECK_LE(this->shape_.size(), 5) + << "Tensor dimension must be less than 6."; } @@ -76,10 +74,6 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con { if (std::is_same::value) NOT_IMPLEMENTED; auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training; - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; Dtype negative_slope = this->layer_param_.relu_param().negative_slope(); bool bottom_data_is_prv = (const_cast(bottom[0]->prv_data()) != NULL); @@ -92,7 +86,18 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con shared_ptr usr_data_mpd(NULL), prv_data_mpd(NULL), top_data_mpd(NULL); memory::data_type src_dt = memory::data_type::f32; memory::data_type top_dt = memory::data_type::f32; - memory::format src_mfmt = memory::format::nchw; + + memory::format src_mfmt; + auto tensor_size = this->shape_.size(); + memory::dims dim = this->shape_; + if(tensor_size == 5) { + src_mfmt = memory::format::ncdhw; + } else { + CHECK_LE(tensor_size, 4) << "The mkldnn relu layer only supports dim size <= 5!"; + if (tensor_size < 4) dim.resize(4, 1); // extend to nchw with dim 1 to match mkldnn format + src_mfmt = memory::format::nchw; + } + //bottom_data_is_prv = false; std::vector scale; if (bottom_data_is_prv) { @@ -105,13 +110,13 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con src_dt = static_cast(mem_descr->prv_memory_pd()->desc().data.data_type); src_mfmt = static_cast(mem_descr->prv_memory_pd()->desc().data.format); } else { - bottom_data_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); + bottom_data_md.reset(new memory::desc({dim}, mpcsn, src_mfmt)); usr_data_mpd.reset(new memory::primitive_desc(*bottom_data_md, cpu_engine)); prv_data_mpd.reset(new memory::primitive_desc(*bottom_data_md, cpu_engine)); scale.push_back(1.); } top_dt = src_dt; - top_data_mpd.reset(new memory::primitive_desc({{n,ic,ih,iw}, top_dt, src_mfmt}, cpu_engine)); + top_data_mpd.reset(new memory::primitive_desc({dim, top_dt, src_mfmt}, cpu_engine)); // ---- Initialize relu primitive descriptor ------------- //relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope); @@ -195,11 +200,6 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top { if (std::is_same::value) NOT_IMPLEMENTED; - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; - Dtype negative_slope = this->layer_param_.relu_param().negative_slope(); bool top_diff_is_prv = top[0]->prv_diff() != NULL; bool inplace = (bottom[0] == top[0]); @@ -220,13 +220,24 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top // ---- Initialize memory descriptors ------------- shared_ptr bottom_data_md; shared_ptr usr_data_mpd(NULL), prv_data_mpd(NULL); + + memory::format mfmt; + auto tensor_size = this->shape_.size(); + memory::dims dim = this->shape_; + if(tensor_size == 5) { + mfmt = memory::format::ncdhw; + } else { + CHECK_LE(tensor_size, 4) << "The mkldnn relu layer only supports dim size <= 5!"; + if (tensor_size < 4) dim.resize(4, 1); // extend to nchw with dim 1 to match mkldnn format + mfmt = memory::format::nchw; + } if (bottom_data_is_prv) { shared_ptr > mem_descr = get_mkldnn_prv_descriptor(bottom[0]); usr_data_mpd = mem_descr->usr_memory_pd(); prv_data_mpd = mem_descr->prv_memory_pd(); } else { - bottom_data_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); + bottom_data_md.reset(new memory::desc(dim, mpcsn, mfmt)); usr_data_mpd.reset(new memory::primitive_desc(*bottom_data_md, cpu_engine)); } @@ -276,7 +287,7 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top top[0]->set_prv_diff_descriptor(NULL); } - top_diff_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); + top_diff_md.reset(new memory::desc(dim, mpcsn, mfmt)); usr_diff_mpd.reset(new memory::primitive_desc(*top_diff_md, cpu_engine)); } diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index 09542b105..3f9f4804d 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -45,19 +45,21 @@ namespace caffe { template void MVNLayer::Reshape(const vector*>& bottom, const vector*>& top) { - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - mean_.Reshape(bottom[0]->num(), bottom[0]->channels(), - 1, 1); - variance_.Reshape(bottom[0]->num(), bottom[0]->channels(), - 1, 1); - temp_.Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); + top[0]->Reshape(bottom[0]->shape()); + vector temp_shape = bottom[0]->shape(); + for (int i = 2; i < temp_shape.size(); i++) + temp_shape[i] = 1; + mean_.Reshape(temp_shape); + variance_.Reshape(temp_shape); + temp_.Reshape(bottom[0]->shape()); + + vector shape = bottom[0]->shape(); + shape[0] = 1; if ( this->layer_param_.mvn_param().across_channels() ) { - sum_multiplier_.Reshape(1, bottom[0]->channels(), bottom[0]->height(), - bottom[0]->width()); + sum_multiplier_.Reshape(shape); } else { - sum_multiplier_.Reshape(1, 1, bottom[0]->height(), bottom[0]->width()); + shape[1] = 1; + sum_multiplier_.Reshape(shape); } Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data); @@ -71,9 +73,9 @@ void MVNLayer::Forward_cpu(const vector*>& bottom, Dtype* top_data = top[0]->mutable_cpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) - num = bottom[0]->num(); + num = bottom[0]->shape(0); else - num = bottom[0]->num() * bottom[0]->channels(); + num = bottom[0]->shape(0) * bottom[0]->shape(1); int dim = bottom[0]->count() / num; @@ -118,9 +120,9 @@ void MVNLayer::Backward_cpu(const vector*>& top, int num; if (this->layer_param_.mvn_param().across_channels()) - num = bottom[0]->num(); + num = bottom[0]->shape(0); else - num = bottom[0]->num() * bottom[0]->channels(); + num = bottom[0]->shape(0) * bottom[0]->shape(1); int dim = bottom[0]->count() / num; diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index 739293be0..1df728d08 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -12,9 +12,9 @@ void MVNLayer::Forward_gpu(const vector*>& bottom, Dtype* top_data = top[0]->mutable_gpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) - num = bottom[0]->num(); + num = bottom[0]->shape(0); else - num = bottom[0]->num() * bottom[0]->channels(); + num = bottom[0]->shape(0) * bottom[0]->shape(1); int dim = bottom[0]->count() / num; @@ -60,9 +60,9 @@ void MVNLayer::Backward_gpu(const vector*>& top, int num; if (this->layer_param_.mvn_param().across_channels()) - num = bottom[0]->num(); + num = bottom[0]->shape(0); else - num = bottom[0]->num() * bottom[0]->channels(); + num = bottom[0]->shape(0) * bottom[0]->shape(1); int dim = bottom[0]->count() / num; diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 43f07cbf8..90b8e67c2 100755 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -203,9 +203,9 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, const int* kernel_shape, const int* pad, const int* stride, const int* dilation, Dtype* data_output) { if (!im2col) { - int im_size = im_shape[0]; + size_t im_size = im_shape[0]; for (int i = 0; i < num_spatial_axes; ++i) { - im_size *= im_shape[1 + i]; + im_size *= (size_t)im_shape[1 + i]; } caffe_set(im_size, Dtype(0), data_output); } @@ -333,7 +333,7 @@ void col2im_cpu(const Dtype* data_col, const int channels, int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; long chunk_len = kernel_h * kernel_w; - caffe_set(height * width * channels, Dtype(0), data_im); + caffe_set((size_t)height * (size_t)width * (size_t)channels, Dtype(0), data_im); #ifdef _OPENMP #pragma omp parallel for if (channels > 1) @@ -378,7 +378,7 @@ void col2im3d_cpu(const Dtype* data_col, const int channels, long height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; long width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; long depth_col = (depth + 2 * pad_d - dil_patch_d) / stride_d + 1; - long num_kernels = channels * height * width * depth; + long num_kernels = (size_t)channels * (size_t)height * (size_t)width * (size_t)depth; long chunk_len = kernel_h * kernel_w * kernel_d; caffe_set(num_kernels, Dtype(0), data_im);