diff --git a/LICENSE b/LICENSE index 7f8d0ca57..3316218f5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ + COPYRIGHT All modification made by Intel Corporation: © 2017 Intel Corporation. diff --git a/Makefile.mkldnn b/Makefile.mkldnn index 797990d77..51f7fcab6 100644 --- a/Makefile.mkldnn +++ b/Makefile.mkldnn @@ -8,6 +8,10 @@ MKLDNN_COMMIT := `cat ${CAFFE_ROOTDIR}/mkldnn.commit` MKLDNN_CXX := $(CXX) MKLDNN_CC := $(CC) +RETURN_STRING=$(shell ./external/mkl/prepare_mkl.sh) +MKLROOT=$(firstword $(RETURN_STRING)) +MKL_ROOTDIR := $(MKLROOT) + # We do this because earlier versions of CMake have problems with ccache ifneq (,$(findstring ccache,$(CXX))) MKLDNN_CXX := $(lastword $(CXX)) @@ -18,7 +22,7 @@ ifneq (,$(findstring ccache,$(CC))) endif MKLDNN_GITHUB := https://github.com/01org/mkl-dnn.git -MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(CAFFE_ROOTDIR)/$(MKLDNN_INSTALLDIR) -B$(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)" +MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(CAFFE_ROOTDIR)/$(MKLDNN_INSTALLDIR) -DMKLROOT=${MKL_ROOTDIR} -B$(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)" ifeq ("$(wildcard $(MKLDNN_INSTALLDIR)/include/mkldnn.hpp)", "") mkldnn_download: diff --git a/README.md b/README.md index dab45a99c..dbc117df6 100644 --- a/README.md +++ b/README.md @@ -71,3 +71,6 @@ Please cite Caffe in your publications if it helps your research: *** *Other names and brands may be claimed as the property of others + + + diff --git a/external/mkl/prepare_mkl.sh b/external/mkl/prepare_mkl.sh index dc0eb2ecc..b68bc7aec 100755 --- a/external/mkl/prepare_mkl.sh +++ b/external/mkl/prepare_mkl.sh @@ -74,10 +74,11 @@ echo $VERSION_LINE # Return Version Line # MKL DST=`dirname $0` OMP=0 -VERSION_MATCH=20170101 -ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20170110.tgz +VERSION_MATCH=20170425 +ARCHIVE_BASENAME=mklml_lnx_2018.0.20170425.tgz MKL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev` -GITHUB_RELEASE_TAG=self_containted_MKLGOLD_u2 +GITHUB_RELEASE_TAG=1.0.0 + MKLURL="https://github.com/intel/caffe/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME" # there are diffrent MKL lib to be used for GCC and for ICC reg='^[0-9]+$' diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp index d8b4d34e5..c61255bd3 100644 --- a/include/caffe/layers/accuracy_layer.hpp +++ b/include/caffe/layers/accuracy_layer.hpp @@ -76,7 +76,7 @@ class AccuracyLayer : public Layer { // If there are two top blobs, then the second blob will contain // accuracies per class. virtual inline int MinTopBlobs() const { return 1; } - virtual inline int MaxTopBlos() const { return 2; } + virtual inline int MaxTopBlobs() const { return 2; } protected: /** diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp old mode 100644 new mode 100755 index 8b3bc99ba..11236681f --- a/include/caffe/layers/base_conv_layer.hpp +++ b/include/caffe/layers/base_conv_layer.hpp @@ -153,6 +153,13 @@ class BaseConvolutionLayer : public Layer { pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff); + } else if (!force_nd_im2col_ && num_spatial_axes_ == 3) { + im3d2col_cpu(data, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], conv_input_shape_.cpu_data()[3], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], kernel_shape_.cpu_data()[2], + pad_.cpu_data()[0], pad_.cpu_data()[1], pad_.cpu_data()[2], + stride_.cpu_data()[0], stride_.cpu_data()[1], stride_.cpu_data()[2], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], dilation_.cpu_data()[2], col_buff); } else { im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), @@ -167,6 +174,13 @@ class BaseConvolutionLayer : public Layer { pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], dilation_.cpu_data()[0], dilation_.cpu_data()[1], data); + } else if (!force_nd_im2col_ && num_spatial_axes_ == 3) { + col2im3d_cpu(col_buff, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], conv_input_shape_.cpu_data()[3], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], kernel_shape_.cpu_data()[2], + pad_.cpu_data()[0], pad_.cpu_data()[1], pad_.cpu_data()[2], + stride_.cpu_data()[0], stride_.cpu_data()[1], stride_.cpu_data()[2], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], dilation_.cpu_data()[2], data); } else { col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp index 5ef3e77dd..391235d4d 100644 --- a/include/caffe/layers/mkldnn_layers.hpp +++ b/include/caffe/layers/mkldnn_layers.hpp @@ -1,510 +1,519 @@ -/* -All modification made by Intel Corporation: © 2016 Intel Corporation - -All contributions by the University of California: -Copyright (c) 2014, 2015, The Regents of the University of California (Regents) -All rights reserved. - -All other contributions: -Copyright (c) 2014, 2015, the respective contributors -All rights reserved. -For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CAFFE_MKLDNN_LAYERS_HPP_ -#define CAFFE_MKLDNN_LAYERS_HPP_ - -#include -#include - -#include "boost/enable_shared_from_this.hpp" -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/engine_parser.hpp" -#include "caffe/layers/base_conv_layer.hpp" -#include "caffe/layers/conv_layer.hpp" -#include "caffe/layers/inner_product_layer.hpp" -#include "caffe/layers/neuron_layer.hpp" -#include "caffe/proto/caffe.pb.h" -#include "caffe/mkldnn_memory.hpp" -#include "mkldnn.hpp" - -#include "caffe/util/performance.hpp" - -using namespace mkldnn; - -namespace caffe { - -// ===== MKLDNNBatchNormLayer ======================================= -template -class MKLDNNBatchNormLayer : public MKLDNNLayer, public Layer { -public: - explicit MKLDNNBatchNormLayer(const LayerParameter& param) - : Layer(param) - , fwd_top_data(), fwd_bottom_data() - , bwd_top_diff(), bwd_bottom_diff() - , BatchNormFwd_pd(), BatchNormBwd_pd() - , mean_memory(), variance_memory() - , scaleshift_memory(), bwd_scaleshift_diff_memory() - , output_memory(), bwd_bottom_diff_memory() - , input_primitive(), bwd_top_diff_primitive() - { - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - } - ~MKLDNNBatchNormLayer() {} - -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "BatchNorm"; } - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); -private: - void InitBatchNorm(const vector*>& bottom, const vector*>& top); - void InitBatchNormBwd(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom); - shared_ptr > fwd_top_data, fwd_bottom_data; - shared_ptr > bwd_top_diff, bwd_bottom_diff; - shared_ptr BatchNormFwd_pd; - shared_ptr BatchNormBwd_pd; - - MKLDNNPrimitive BatchNormFwd, BatchNormBwd; - shared_ptr mean_memory, variance_memory; - - shared_ptr scaleshift_memory, bwd_scaleshift_diff_memory; - shared_ptr output_memory, bwd_bottom_diff_memory; - - shared_ptr input_primitive, bwd_top_diff_primitive; - - int32_t num_, width_, height_, channels_; - Dtype eps_, moving_average_fraction_; - bool use_weight_bias_, bias_term_, use_global_stats_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNConvolutionLayer ======================================= -template -class MKLDNNConvolutionLayer : public MKLDNNLayer , public ConvolutionLayer { -public: - explicit MKLDNNConvolutionLayer(const LayerParameter& param); - virtual ~MKLDNNConvolutionLayer() {} -protected: - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - // Customized methods - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - void Reshape(const vector*>& bottom, const vector*>& top); -private: - virtual void compute_output_shape(); - virtual void init_properties(const vector*>& bottom, const vector*>& top); - void InitConvolutionFwd(const vector*>& bottom, const vector*>& top); - void InitConvolutionBwd(const vector*>& top - , const vector& propagate_down - , const vector*>& bottom); - - shared_ptr > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data - , bwdd_weights_data, bwdw_bottom_data; - shared_ptr > bwdd_bottom_diff, bwdd_top_diff - , bwdw_top_diff, bwdw_weights_diff, bwdw_bias_diff; - shared_ptr convFwd_pd; - shared_ptr convBwdData_pd; - shared_ptr convBwdWeights_pd; - MKLDNNPrimitive convFwd, convBwdData, convBwdWeights; - shared_ptr fwd_top_data_memory, bwdd_bottom_diff_memory - , bwdw_weights_diff_memory, bwdw_bias_diff_memory; - shared_ptr fwd_bottom_data_primitive, fwd_weights_data_primitive, fwd_bias_data_primitive - , bwdd_top_diff_primitive, bwdd_weights_data_primitive - , bwdw_top_diff_primitive, bwdw_bottom_data_primitive; - int32_t width_, height_, width_out_, height_out_, kernel_w_, kernel_h_, stride_w_, stride_h_; - int pad_w_, pad_h_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_weights_); -}; - -// ===== MKLDNNInnerProductLayer ======================================= -template -class MKLDNNInnerProductLayer : public MKLDNNLayer , public InnerProductLayer { -public: - explicit MKLDNNInnerProductLayer(const LayerParameter& param); - virtual ~MKLDNNInnerProductLayer(); -protected: - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - // Customized methods - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - void Reshape(const vector*>& bottom, const vector*>& top); -private: - void InitInnerProductFwd(const vector*>& bottom, const vector*>& top); - void InitInnerProductBwd(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - shared_ptr > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data - , bwdd_weights_data, bwdw_bottom_data; - shared_ptr > bwdd_bottom_diff, bwdd_top_diff - , bwdw_top_diff, bwdw_weights_diff, bwdw_bias_diff; - shared_ptr ipFwd_pd; - shared_ptr ipBwdData_pd; - shared_ptr ipBwdWeights_pd; - - MKLDNNPrimitive ipFwd, ipBwdData, ipBwdWeights; - shared_ptr fwd_top_data_memory, bwdd_bottom_diff_memory - , bwdw_weights_diff_memory, bwdw_bias_diff_memory; - shared_ptr fwd_bottom_data_primitive, fwd_weights_data_primitive, fwd_bias_data_primitive - , bwdd_top_diff_primitive, bwdd_weights_data_primitive - , bwdw_top_diff_primitive, bwdw_bottom_data_primitive; - int32_t w_, h_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_weights_); -}; - - -/** - * @brief Normalize the input in a local region across feature maps. - */ - -// ===== MKLDNNLRNLayer ======================================= -template -class MKLDNNLRNLayer : public MKLDNNLayer , public Layer { -public: - explicit MKLDNNLRNLayer(const LayerParameter& param); - virtual ~MKLDNNLRNLayer() {} -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - virtual inline const char* type() const { return "LRN"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } -private: - void InitLRNFwd(const vector*>& bottom, const vector*>& top); - void InitLRNBwd(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - shared_ptr > fwd_top_data, fwd_bottom_data; - shared_ptr > bwd_top_diff, bwd_bottom_diff; - shared_ptr lrnFwd_pd; - shared_ptr lrnBwd_pd; - MKLDNNPrimitive lrnFwd; - MKLDNNPrimitive lrnBwd; - shared_ptr fwd_top_data_memory, bwd_bottom_diff_memory, scratch_memory; - shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; - Dtype alpha_, beta_, k_; - int size_, num_, width_, height_, channels_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNPoolingLayer ======================================= -template -class MKLDNNPoolingLayer : public MKLDNNLayer, public Layer { -public: - explicit MKLDNNPoolingLayer(const LayerParameter& param) - : MKLDNNLayer(), Layer(param) - , fwd_bottom_data(), fwd_top_data() - , bwd_top_diff(), bwd_bottom_diff() - , poolingFwd_pd() - , poolingBwd_pd() - , indices_pd() - , indices_memory(), fwd_top_data_memory(), bwd_bottom_diff_memory() - , fwd_bottom_data_primitive(), bwd_top_diff_primitive() - , num_(0), channels_(0), width_(0), height_(0), width_out_(0), height_out_(0) - , kernel_w_(0), kernel_h_(0), stride_w_(0), stride_h_(0) - , pad_t_(0),pad_b_(0), pad_l_(0), pad_r_(0) - , global_pooling_(false) - { - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - } - ~MKLDNNPoolingLayer() {} -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - - virtual inline const char* type() const { return "Pooling"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - // MAX POOL layers can output an extra top blob for the mask; - // others can only output the pooled inputs. - virtual inline int MaxTopBlobs() const { - return (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) ? 2 : 1; - } -protected: - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top,const vector& propagate_down - ,const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - ,const vector*>& bottom); - -private: - void InitPoolingFwd(const vector*>& bottom, const vector*>& top); - void InitPoolingBwd(const vector*>& bottom - , const vector& propagate_down - , const vector*>& top); - - shared_ptr> fwd_bottom_data, fwd_top_data; - shared_ptr> bwd_top_diff, bwd_bottom_diff; - shared_ptr poolingFwd_pd; - shared_ptr poolingBwd_pd; - MKLDNNPrimitive poolingFwd, poolingBwd; - shared_ptr indices_pd; - shared_ptr indices_memory, fwd_top_data_memory, bwd_bottom_diff_memory; - shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; - int32_t num_, channels_, width_, height_, width_out_, height_out_; - int32_t kernel_w_, kernel_h_, stride_w_, stride_h_; - int32_t pad_t_, pad_b_, pad_l_, pad_r_; - Blob max_idx_; - bool global_pooling_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNReLULayer ======================================= -template -class MKLDNNReLULayer : public MKLDNNLayer , public NeuronLayer { -public: - /** - * @param param provides ReLUParameter relu_param, - * with ReLULayer options: - * - negative_slope (\b optional, default 0). - * the value @f$ \nu @f$ by which negative values are multiplied. - */ - explicit MKLDNNReLULayer(const LayerParameter& param) - : MKLDNNLayer(), NeuronLayer(param) - , fwd_top_data(), fwd_bottom_data() - , bwd_top_diff(), bwd_bottom_diff() - , reluFwd_pd(), reluBwd_pd() - , fwd_top_data_memory(), bwd_bottom_diff_memory() - , fwd_bottom_data_primitive(), bwd_top_diff_primitive() - , num_(0), width_(0), height_(0), channels_(0) - { - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - } - ~MKLDNNReLULayer() {} - -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "ReLU"; } - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); -private: - void InitReLUFwd(const vector*>& bottom, const vector*>& top); - void InitReLUBwd(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - shared_ptr > fwd_top_data, fwd_bottom_data; - shared_ptr > bwd_top_diff, bwd_bottom_diff; - shared_ptr reluFwd_pd; - shared_ptr reluBwd_pd; - MKLDNNPrimitive reluFwd, reluBwd; - shared_ptr fwd_top_data_memory, bwd_bottom_diff_memory; - shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; - int32_t num_, width_, height_, channels_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNConcatLayer ====================================== -template -class MKLDNNConcatLayer : public MKLDNNLayer , public Layer { -public: - explicit MKLDNNConcatLayer(const LayerParameter& param) - : MKLDNNLayer(), Layer(param), - concatFwd_pd(), fwd_output_memory(), - bwd_reorder_input_memory(), bwd_reorder_output_memory(), - fwd_top_data(), fwd_bottom_data(), split_channels() { - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - } -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "Concat"; } - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); -private: - void InitConcatFwd(const vector*>& bottom, const vector*>& top); - void InitConcatBwd(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - shared_ptr concatFwd_pd; - shared_ptr fwd_output_memory; - shared_ptr bwd_reorder_input_memory; - vector> bwd_reorder_output_memory; - vector> bwd_bottom_memory_; - vector> fwd_input_primitives_; - vector fwd_input_primitives_at_; - MKLDNNPrimitive concatFwd; - shared_ptr > fwd_top_data; - vector > > fwd_bottom_data; - shared_ptr > bwd_top_diff; - vector > > bwd_bottom_diff; - vector > reorders; - vector split_channels; - - int32_t num_, width_, height_, channels_, num_concats_; - int concat_dimension; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNSplitLayer ====================================== -template -class MKLDNNSplitLayer : public MKLDNNLayer , public Layer { -public: - explicit MKLDNNSplitLayer(const LayerParameter& param) - : MKLDNNLayer(), Layer(param), - splitBwd_pd_(), bwd_bottom_diff_memory_() - { - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - } - ~MKLDNNSplitLayer(); - -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "Split"; } - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); -private: - void InitSplitFwd(const vector*>& bottom, const vector*>& top); - void InitSplitBwd(const vector*>& top, const vector*>& bottom); - - private: - vector sizes_src_; - vector strides_src_; - MKLDNNPrimitive splitBwd_; - shared_ptr splitBwd_pd_; - shared_ptr bwd_bottom_diff_memory_; - shared_ptr > bwd_bottom_diff_; - vector> bwd_top_diff_primitives_; - vector bwd_top_diffs_primitives_at_; - vector > > bwd_top_diffs_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); -}; - -// ===== MKLDNNEltwiseLayer ======================================= -template -class MKLDNNEltwiseLayer : public MKLDNNLayer , public Layer { -public: - explicit MKLDNNEltwiseLayer(const LayerParameter& param) - : MKLDNNLayer(), Layer(param) - , fwd_top_data(), fwd_bottom_data() - , eltwiseFwd_pd() - , fwd_top_data_memory() - , fwd_bottom_data_primitives_() - , num_(0), width_(0), height_(0), channels_(0) - , num_bottoms_(0) - { - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - } - ~MKLDNNEltwiseLayer() {} - -protected: - virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "Eltwise"; } - virtual inline int MinBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 1; } - virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void Backward_cpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); -private: - void InitEltwiseFwd(const vector*>& bottom, const vector*>& top); - void InitEltwiseBwd(const vector*>& top, const vector& propagate_down - , const vector*>& bottom); - - shared_ptr > fwd_top_data; - vector > > fwd_bottom_data; - shared_ptr eltwiseFwd_pd; - MKLDNNPrimitive eltwiseFwd; - - shared_ptr fwd_top_data_memory; - vector> fwd_bottom_data_primitives_; - vector fwd_bottom_data_primitives_at_; - - EltwiseParameter_EltwiseOp op_; - vector coeffs_; - Blob max_idx_; - int32_t num_, width_, height_, channels_; - int32_t num_bottoms_; - bool stable_prod_grad_; - - PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); -}; - - -} // namespace caffe -#endif // #ifndef CAFFE_MKLDNN_LAYERS_HPP_ +/* +All modification made by Intel Corporation: © 2016 Intel Corporation + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. +For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CAFFE_MKLDNN_LAYERS_HPP_ +#define CAFFE_MKLDNN_LAYERS_HPP_ + +#include +#include + +#include "boost/enable_shared_from_this.hpp" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/engine_parser.hpp" +#include "caffe/layers/base_conv_layer.hpp" +#include "caffe/layers/conv_layer.hpp" +#include "caffe/layers/inner_product_layer.hpp" +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/mkldnn_memory.hpp" +#include "mkldnn.hpp" + +#include "caffe/util/performance.hpp" + +using namespace mkldnn; + +namespace caffe { + +// ===== MKLDNNBatchNormLayer ======================================= +template +class MKLDNNBatchNormLayer : public MKLDNNLayer, public Layer { +public: + explicit MKLDNNBatchNormLayer(const LayerParameter& param) + : Layer(param) + , fwd_top_data(), fwd_bottom_data() + , bwd_top_diff(), bwd_bottom_diff() + , BatchNormFwd_pd(), BatchNormBwd_pd() + , mean_memory(), variance_memory() + , scaleshift_memory(), bwd_scaleshift_diff_memory() + , output_memory(), bwd_bottom_diff_memory() + , input_primitive(), bwd_top_diff_primitive() + { + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + } + ~MKLDNNBatchNormLayer() {} + +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "BatchNorm"; } + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); +private: + void InitBatchNorm(const vector*>& bottom, const vector*>& top); + void InitBatchNormBwd(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom); + shared_ptr > fwd_top_data, fwd_bottom_data; + shared_ptr > bwd_top_diff, bwd_bottom_diff; + shared_ptr BatchNormFwd_pd; + shared_ptr BatchNormBwd_pd; + + MKLDNNPrimitive BatchNormFwd, BatchNormBwd; + shared_ptr mean_memory, variance_memory; + + shared_ptr scaleshift_memory, bwd_scaleshift_diff_memory; + shared_ptr output_memory, bwd_bottom_diff_memory; + + shared_ptr input_primitive, bwd_top_diff_primitive; + + int32_t num_, width_, height_, channels_; + Dtype eps_, moving_average_fraction_; + bool use_weight_bias_, bias_term_, use_global_stats_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNConvolutionLayer ======================================= +template +class MKLDNNConvolutionLayer : public MKLDNNLayer , public ConvolutionLayer { +public: + explicit MKLDNNConvolutionLayer(const LayerParameter& param); + virtual ~MKLDNNConvolutionLayer() {} + + //For test the parameters of kernel/stride/pad + int GetKernelWidth() { return kernel_w_; } + int GetKernelHeight() { return kernel_h_; } + int GetStrideWidth() { return stride_w_; } + int GetStrideHeight() { return stride_h_; } + int GetPadWidth() { return pad_w_; } + int GetPadHeight() { return pad_h_; } +protected: + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + // Customized methods + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + void Reshape(const vector*>& bottom, const vector*>& top); +private: + virtual void compute_output_shape(); + virtual void init_properties(const vector*>& bottom, const vector*>& top); + void InitConvolutionFwd(const vector*>& bottom, const vector*>& top); + void InitConvolutionBwd(const vector*>& top + , const vector& propagate_down + , const vector*>& bottom); + + shared_ptr > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data + , bwdd_weights_data, bwdw_bottom_data; + shared_ptr > bwdd_bottom_diff, bwdd_top_diff + , bwdw_top_diff, bwdw_weights_diff, bwdw_bias_diff; + shared_ptr convFwd_pd; + shared_ptr convBwdData_pd; + shared_ptr convBwdWeights_pd; + MKLDNNPrimitive convFwd, convBwdData, convBwdWeights; + shared_ptr fwd_top_data_memory, bwdd_bottom_diff_memory + , bwdw_weights_diff_memory, bwdw_bias_diff_memory; + shared_ptr fwd_bottom_data_primitive, fwd_weights_data_primitive, fwd_bias_data_primitive + , bwdd_top_diff_primitive, bwdd_weights_data_primitive + , bwdw_top_diff_primitive, bwdw_bottom_data_primitive; + int32_t width_, height_, width_out_, height_out_, kernel_w_, kernel_h_, stride_w_, stride_h_; + int pad_w_, pad_h_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_weights_); +}; + +// ===== MKLDNNInnerProductLayer ======================================= +template +class MKLDNNInnerProductLayer : public MKLDNNLayer , public InnerProductLayer { +public: + explicit MKLDNNInnerProductLayer(const LayerParameter& param); + virtual ~MKLDNNInnerProductLayer(); +protected: + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + // Customized methods + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + void Reshape(const vector*>& bottom, const vector*>& top); +private: + void InitInnerProductFwd(const vector*>& bottom, const vector*>& top); + void InitInnerProductBwd(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + shared_ptr > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data + , bwdd_weights_data, bwdw_bottom_data; + shared_ptr > bwdd_bottom_diff, bwdd_top_diff + , bwdw_top_diff, bwdw_weights_diff, bwdw_bias_diff; + shared_ptr ipFwd_pd; + shared_ptr ipBwdData_pd; + shared_ptr ipBwdWeights_pd; + + MKLDNNPrimitive ipFwd, ipBwdData, ipBwdWeights; + shared_ptr fwd_top_data_memory, bwdd_bottom_diff_memory + , bwdw_weights_diff_memory, bwdw_bias_diff_memory; + shared_ptr fwd_bottom_data_primitive, fwd_weights_data_primitive, fwd_bias_data_primitive + , bwdd_top_diff_primitive, bwdd_weights_data_primitive + , bwdw_top_diff_primitive, bwdw_bottom_data_primitive; + int32_t w_, h_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_weights_); +}; + + +/** + * @brief Normalize the input in a local region across feature maps. + */ + +// ===== MKLDNNLRNLayer ======================================= +template +class MKLDNNLRNLayer : public MKLDNNLayer , public Layer { +public: + explicit MKLDNNLRNLayer(const LayerParameter& param); + virtual ~MKLDNNLRNLayer() {} +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + virtual inline const char* type() const { return "LRN"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } +private: + void InitLRNFwd(const vector*>& bottom, const vector*>& top); + void InitLRNBwd(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + shared_ptr > fwd_top_data, fwd_bottom_data; + shared_ptr > bwd_top_diff, bwd_bottom_diff; + shared_ptr lrnFwd_pd; + shared_ptr lrnBwd_pd; + MKLDNNPrimitive lrnFwd; + MKLDNNPrimitive lrnBwd; + shared_ptr bottom_md; + shared_ptr fwd_top_data_memory, bwd_bottom_diff_memory, scratch_memory; + shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; + Dtype alpha_, beta_, k_; + int size_, num_, width_, height_, channels_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNPoolingLayer ======================================= +template +class MKLDNNPoolingLayer : public MKLDNNLayer, public Layer { +public: + explicit MKLDNNPoolingLayer(const LayerParameter& param) + : MKLDNNLayer(), Layer(param) + , fwd_bottom_data(), fwd_top_data() + , bwd_top_diff(), bwd_bottom_diff() + , poolingFwd_pd() + , poolingBwd_pd() + , indices_pd() + , indices_memory(), fwd_top_data_memory(), bwd_bottom_diff_memory() + , fwd_bottom_data_primitive(), bwd_top_diff_primitive() + , num_(0), channels_(0), width_(0), height_(0), width_out_(0), height_out_(0) + , kernel_w_(0), kernel_h_(0), stride_w_(0), stride_h_(0) + , pad_t_(0),pad_b_(0), pad_l_(0), pad_r_(0) + , global_pooling_(false) + { + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + } + ~MKLDNNPoolingLayer() {} +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + + virtual inline const char* type() const { return "Pooling"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + // MAX POOL layers can output an extra top blob for the mask; + // others can only output the pooled inputs. + virtual inline int MaxTopBlobs() const { + return (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) ? 2 : 1; + } +protected: + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top,const vector& propagate_down + ,const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + ,const vector*>& bottom); + +private: + void InitPoolingFwd(const vector*>& bottom, const vector*>& top); + void InitPoolingBwd(const vector*>& bottom + , const vector& propagate_down + , const vector*>& top); + + shared_ptr> fwd_bottom_data, fwd_top_data; + shared_ptr> bwd_top_diff, bwd_bottom_diff; + shared_ptr poolingFwd_pd; + shared_ptr poolingBwd_pd; + MKLDNNPrimitive poolingFwd, poolingBwd; + shared_ptr indices_pd; + shared_ptr indices_memory, fwd_top_data_memory, bwd_bottom_diff_memory; + shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; + int32_t num_, channels_, width_, height_, width_out_, height_out_; + int32_t kernel_w_, kernel_h_, stride_w_, stride_h_; + int32_t pad_t_, pad_b_, pad_l_, pad_r_; + Blob max_idx_; + bool global_pooling_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNReLULayer ======================================= +template +class MKLDNNReLULayer : public MKLDNNLayer , public NeuronLayer { +public: + /** + * @param param provides ReLUParameter relu_param, + * with ReLULayer options: + * - negative_slope (\b optional, default 0). + * the value @f$ \nu @f$ by which negative values are multiplied. + */ + explicit MKLDNNReLULayer(const LayerParameter& param) + : MKLDNNLayer(), NeuronLayer(param) + , fwd_top_data(), fwd_bottom_data() + , bwd_top_diff(), bwd_bottom_diff() + , reluFwd_pd(), reluBwd_pd() + , fwd_top_data_memory(), bwd_bottom_diff_memory() + , fwd_bottom_data_primitive(), bwd_top_diff_primitive() + , num_(0), width_(0), height_(0), channels_(0) + { + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + } + ~MKLDNNReLULayer() {} + +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "ReLU"; } + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); +private: + void InitReLUFwd(const vector*>& bottom, const vector*>& top); + void InitReLUBwd(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + shared_ptr > fwd_top_data, fwd_bottom_data; + shared_ptr > bwd_top_diff, bwd_bottom_diff; + shared_ptr reluFwd_pd; + shared_ptr reluBwd_pd; + MKLDNNPrimitive reluFwd, reluBwd; + shared_ptr fwd_top_data_memory, bwd_bottom_diff_memory; + shared_ptr fwd_bottom_data_primitive, bwd_top_diff_primitive; + int32_t num_, width_, height_, channels_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNConcatLayer ====================================== +template +class MKLDNNConcatLayer : public MKLDNNLayer , public Layer { +public: + explicit MKLDNNConcatLayer(const LayerParameter& param) + : MKLDNNLayer(), Layer(param), + concatFwd_pd(), fwd_output_memory(), + bwd_reorder_input_memory(), bwd_reorder_output_memory(), + fwd_top_data(), fwd_bottom_data(), split_channels() { + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + } +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "Concat"; } + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); +private: + void InitConcatFwd(const vector*>& bottom, const vector*>& top); + void InitConcatBwd(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + shared_ptr concatFwd_pd; + shared_ptr fwd_output_memory; + shared_ptr bwd_reorder_input_memory; + vector> bwd_reorder_output_memory; + vector> bwd_bottom_memory_; + vector> fwd_input_primitives_; + vector fwd_input_primitives_at_; + MKLDNNPrimitive concatFwd; + shared_ptr > fwd_top_data; + vector > > fwd_bottom_data; + shared_ptr > bwd_top_diff; + vector > > bwd_bottom_diff; + vector > reorders; + vector split_channels; + + int32_t num_, width_, height_, channels_, num_concats_; + int concat_dimension; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNSplitLayer ====================================== +template +class MKLDNNSplitLayer : public MKLDNNLayer , public Layer { +public: + explicit MKLDNNSplitLayer(const LayerParameter& param) + : MKLDNNLayer(), Layer(param), + splitBwd_pd_(), bwd_bottom_diff_memory_() + { + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + } + ~MKLDNNSplitLayer(); + +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "Split"; } + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); +private: + void InitSplitFwd(const vector*>& bottom, const vector*>& top); + void InitSplitBwd(const vector*>& top, const vector*>& bottom); + + private: + vector sizes_src_; + vector strides_src_; + MKLDNNPrimitive splitBwd_; + shared_ptr splitBwd_pd_; + shared_ptr bwd_bottom_diff_memory_; + shared_ptr > bwd_bottom_diff_; + vector> bwd_top_diff_primitives_; + vector bwd_top_diffs_primitives_at_; + vector > > bwd_top_diffs_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_bw_); +}; + +// ===== MKLDNNEltwiseLayer ======================================= +template +class MKLDNNEltwiseLayer : public MKLDNNLayer , public Layer { +public: + explicit MKLDNNEltwiseLayer(const LayerParameter& param) + : MKLDNNLayer(), Layer(param) + , fwd_top_data(), fwd_bottom_data() + , eltwiseFwd_pd() + , fwd_top_data_memory() + , fwd_bottom_data_primitives_() + , num_(0), width_(0), height_(0), channels_(0) + , num_bottoms_(0) + { + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + } + ~MKLDNNEltwiseLayer() {} + +protected: + virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "Eltwise"; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); + virtual void Backward_cpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); +private: + void InitEltwiseFwd(const vector*>& bottom, const vector*>& top); + void InitEltwiseBwd(const vector*>& top, const vector& propagate_down + , const vector*>& bottom); + + shared_ptr > fwd_top_data; + vector > > fwd_bottom_data; + shared_ptr eltwiseFwd_pd; + MKLDNNPrimitive eltwiseFwd; + + shared_ptr fwd_top_data_memory; + vector> fwd_bottom_data_primitives_; + vector fwd_bottom_data_primitives_at_; + + EltwiseParameter_EltwiseOp op_; + vector coeffs_; + Blob max_idx_; + int32_t num_, width_, height_, channels_; + int32_t num_bottoms_; + bool stable_prod_grad_; + + PERFORMANCE_EVENT_ID_DECL(perf_id_fw_); +}; + + +} // namespace caffe +#endif // #ifndef CAFFE_MKLDNN_LAYERS_HPP_ diff --git a/include/caffe/layers/softmax_loss_layer.hpp b/include/caffe/layers/softmax_loss_layer.hpp index de5fc3dc7..741d31f1d 100644 --- a/include/caffe/layers/softmax_loss_layer.hpp +++ b/include/caffe/layers/softmax_loss_layer.hpp @@ -100,6 +100,9 @@ class SoftmaxWithLossLayer : public LossLayer { virtual inline int MinTopBlobs() const { return 1; } virtual inline int MaxTopBlobs() const { return 2; } + virtual inline int ExactNumBottomBlobs() const { return -1; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MaxBottomBlobs() const { return 3; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); diff --git a/include/caffe/mkldnn_base.hpp b/include/caffe/mkldnn_base.hpp index 1cc8923ac..f68d590d4 100644 --- a/include/caffe/mkldnn_base.hpp +++ b/include/caffe/mkldnn_base.hpp @@ -203,6 +203,10 @@ template class MKLDNNPrimitive { public: explicit MKLDNNPrimitive():aprimitive(), mkldnn_stream() {} + + //API for initializing with shared_ptr + MKLDNNPrimitive(shared_ptr aprimitive_input) {this->aprimitive = aprimitive_input;} + virtual ~MKLDNNPrimitive() {} void reset(primitive* pprimitive) { this->aprimitive.reset(pprimitive);} shared_ptr aprimitive; diff --git a/include/caffe/util/compareToolUtilities.h b/include/caffe/util/compareToolUtilities.h index ab1ee877d..754890b0d 100644 --- a/include/caffe/util/compareToolUtilities.h +++ b/include/caffe/util/compareToolUtilities.h @@ -372,13 +372,16 @@ int collectAndCheckLayerData(bool collect_step, } if (bottom_need_backward[i].size() > 0 && bottom_need_backward[i][0]) { - getFileName(file_name, false, "FwrdBtmDat", i); - checkData(file_name, bottom_vecs[i][0]->cpu_data(), - layers[i]->type(), output_dir, - &erronous_layers); - checkAllNans(bottom_vecs[i][0]->cpu_diff(), - bottom_vecs[i][0]->count(), "bottom.diff", - layers[i]->type(), &erronous_layers); + // We check data only for out-of-place computations + if (bottom_vecs[i][0] != top_vecs[i][0]) { + getFileName(file_name, false, "FwrdBtmDat", i); + checkData(file_name, bottom_vecs[i][0]->cpu_data(), + layers[i]->type(), output_dir, + &erronous_layers); + } + checkAllNans(bottom_vecs[i][0]->cpu_diff(), + bottom_vecs[i][0]->count(), "bottom.diff", + layers[i]->type(), &erronous_layers); } checkAllNans(top_vecs[i][0]->cpu_diff(), diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp old mode 100644 new mode 100755 index 8c3a2566c..1f2b567e3 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -53,6 +53,13 @@ void im2col_cpu(const Dtype* data_im, const int channels, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col); +template +void im3d2col_cpu(const Dtype* data_im, const int channels, + const int depth, const int height, const int width, const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, const int stride_d, const int stride_h, + const int stride_w, const int dilation_d, const int dilation_h, const int dilation_w, + Dtype* data_col); + template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, const int* im_shape, const int* col_shape, @@ -66,6 +73,13 @@ void col2im_cpu(const Dtype* data_col, const int channels, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_im); +template +void col2im3d_cpu(const Dtype* data_col, const int channels, + const int depth, const int height, const int width, const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, const int stride_d, const int stride_h, + const int stride_w, const int dilation_d, const int dilation_h, const int dilation_w, + Dtype* data_im); + template void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, const int col_size, const int* im_shape, const int* col_shape, diff --git a/models/intel_optimized_models/resnet_50/solver.prototxt b/models/intel_optimized_models/resnet_50/solver.prototxt index a18d6e572..4574a306f 100644 --- a/models/intel_optimized_models/resnet_50/solver.prototxt +++ b/models/intel_optimized_models/resnet_50/solver.prototxt @@ -1,5 +1,5 @@ #This solver is described by Computer Vision Group Jena (CVGJ) in [ImageNet pre-trained models with batch normalization] (https://arxiv.org/pdf/1612.01452.pdf) -net: "train_val.prototxt" +net: "models/intel_optimized_models/resnet_50/train_val.prototxt" test_iter: 5000 test_interval: 15000 base_lr: 0.1 @@ -11,6 +11,7 @@ power: 1 momentum: 0.9 weight_decay: 0.0001 snapshot: 30000 -snapshot_prefix: "caffe-resnet50" +snapshot_prefix: "models/intel_optimized_models/resnet_50/caffe-resnet50" test_initialization: false solver_mode: CPU + diff --git a/models/intel_optimized_models/resnet_50/train_val.prototxt b/models/intel_optimized_models/resnet_50/train_val.prototxt index 09cd2b99e..6aadf5ca5 100644 --- a/models/intel_optimized_models/resnet_50/train_val.prototxt +++ b/models/intel_optimized_models/resnet_50/train_val.prototxt @@ -22,7 +22,7 @@ transform_param { mean_value: 123 } data_param { - source: "/data/compressed_lmdb/ilsvrc12_train_lmdb" + source: "examples/imagenet/ilsvrc12_train_lmdb" batch_size: 128 backend: LMDB shuffle: true @@ -46,7 +46,7 @@ transform_param { mean_value: 123 } data_param { - source: "/data/compressed_lmdb/ilsvrc12_val_lmdb/" + source: "examples/imagenet/ilsvrc12_val_lmdb/" batch_size: 10 backend: LMDB } diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index e84fe9553..8a169864b 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -132,7 +132,7 @@ void ConcatLayer::Backward_cpu(const vector*>& top, if (propagate_down[i]) { Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel for if(num_concats_ > 1) #endif for (int n = 0; n < num_concats_; ++n) { caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 36bba5e90..82efc2410 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -73,7 +73,9 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); #ifdef _OPENMP - #pragma omp parallel for num_threads(this->num_of_threads_) + #pragma omp parallel if(this->num_of_threads_ > 1) num_threads(this->num_of_threads_) + { + #pragma omp for #endif for (int n = 0; n < this->num_; ++n) { this->forward_cpu_gemm(bottom_data + n*this->bottom_dim_, @@ -84,6 +86,9 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, this->forward_cpu_bias(top_data + n * this->top_dim_, bias); } } +#ifdef _OPENMP + } +#endif } } @@ -111,8 +116,10 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, if (this->param_propagate_down_[0]) { #ifdef _OPENMP - this->clear_weight_mt(); - #pragma omp parallel num_threads(this->num_of_threads_) + if (this->num_of_threads_ > 1) { + this->clear_weight_mt(); + } + #pragma omp parallel if(this->num_of_threads_ > 1) num_threads(this->num_of_threads_) #endif { #ifdef _OPENMP @@ -125,20 +132,27 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, } #ifdef _OPENMP - this->sum_weight_mt(weight_diff); + if (this->num_of_threads_ > 1) { + this->sum_weight_mt(weight_diff); + } #endif } } if (propagate_down[i]) { #ifdef _OPENMP - #pragma omp parallel for num_threads(this->num_of_threads_) + #pragma omp parallel if(this->num_of_threads_ > 1) num_threads(this->num_of_threads_) + { + #pragma omp for #endif for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. bottom data, if necessary. this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight, bottom_diff + n * this->bottom_dim_); } +#ifdef _OPENMP + } +#endif } } } diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index c23c583de..a2bf24333 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -69,6 +69,15 @@ void DropoutLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); + // below line designated to set correspondent SyncedMemory->_head to HEAD_AT_CPU + // Fix the issue of "Check failed: this->_cpu_ptr == cpu_ptr (0 vs. 0x5587dfc87ec0)" (GoogleNet V1) + // The reason is after pooling layer: MKLDNNPoolingLayer::Forward_cpu: pool5/7x7_s1, the top[0]->prv_data() has value + // It will convert to cpu data in the dropout layer, and set the _head to HEAD_AT_CPU after executing top[0]->mutable_cpu_data() + // Howerver, I found top[0]->cpu_data() and top[0]->prv_data() both has value + // So in the inner product layer: loss3/classifier, the data will convert from bottom prv data + // and the reorder will change from this->_reorder_usr2prv to this->_reorder_extprv2prv_pd + // So eventually trigger the assertion. + top[0]->set_prv_data_descriptor(NULL); unsigned int* mask = rand_vec_.mutable_cpu_data(); const int count = bottom[0]->count(); if (this->phase_ == TRAIN) { diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 725dfcc0f..6124bb494 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -98,10 +98,10 @@ void HDF5DataLayer::LoadHDF5FileData(const char* filename) { // Shuffle if needed. if (this->layer_param_.hdf5_data_param().shuffle()) { std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); - DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) + DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0) << " rows (shuffled)"; } else { - DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows"; + DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0) << " rows"; } } diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp old mode 100644 new mode 100755 index 79a3ceb3d..b2e86830f --- a/src/caffe/layers/mkl_batch_norm_layer.cpp +++ b/src/caffe/layers/mkl_batch_norm_layer.cpp @@ -66,6 +66,7 @@ void MKLBatchNormLayer::Init(const vector*>& bottom, eps_ = this->layer_param_.batch_norm_param().eps(); use_weight_bias_ = this->layer_param_.batch_norm_param().use_weight_bias(); bias_term_ = this->layer_param_.batch_norm_param().bias_term(); + use_global_stats_ = this->layer_param_.batch_norm_param().use_global_stats(); CHECK(use_weight_bias_) << "BatchNorm without scaling have not supported yet"; @@ -111,6 +112,7 @@ void MKLBatchNormLayer::Init(const vector*>& bottom, dnnReleaseBuffer(variance_buffer_); dnnReleaseBuffer(scaleShift_buffer_); dnnReleaseBuffer(diffScaleShift_buffer_); + // "Lazy" allocation because here we don't know // what layout is used by neighbours. @@ -271,9 +273,15 @@ void MKLBatchNormLayer::Forward_cpu( bwd_top_diff ->create_internal_layout(batchNormFwd, dnnResourceDst); bwd_bottom_diff->create_internal_layout(batchNormFwd, dnnResourceSrc); - e = dnnBatchNormalizationCreateBackward( - &batchNormBwd, NULL, mem_descr->layout_int, eps_, dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); + if (!use_global_stats_) { + e = dnnBatchNormalizationCreateBackward( + &batchNormBwd, NULL, mem_descr->layout_int, eps_, dnnUseScaleShift); + CHECK_EQ(e, E_SUCCESS); + } else { + e = dnnBatchNormalizationCreateBackward( + &batchNormBwd, NULL, mem_descr->layout_int, eps_, dnnUseScaleShift | dnnUseInputMeanVariance); + CHECK_EQ(e, E_SUCCESS); + } } } else { DLOG(INFO) << "Using cpu_data in MKLBatchNormLayer."; @@ -290,9 +298,15 @@ void MKLBatchNormLayer::Forward_cpu( dnnUseScaleShift | dnnUseInputMeanVariance); CHECK_EQ(e, E_SUCCESS); - e = dnnBatchNormalizationCreateBackward( - &batchNormBwd, NULL, layout_usr_, eps_, dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); + if (!use_global_stats_) { + e = dnnBatchNormalizationCreateBackward( + &batchNormBwd, NULL, layout_usr_, eps_, dnnUseScaleShift); + CHECK_EQ(e, E_SUCCESS); + } else { + e = dnnBatchNormalizationCreateBackward( + &batchNormBwd, NULL, layout_usr_, eps_, dnnUseScaleShift | dnnUseInputMeanVariance); + CHECK_EQ(e, E_SUCCESS); + } } bottom_data = reinterpret_cast(const_cast(bottom[0]->cpu_data())); @@ -360,13 +374,13 @@ void MKLBatchNormLayer::Forward_cpu( // doing Backward // TODO: make a caffe_coppy working on blobs caffe_copy(amount_to_copy, static_cast(bottom_data), - temp_.mutable_cpu_data()); + temp_.mutable_cpu_data()); } if (use_global_stats_) { // use the stored mean/variance estimates. const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? - 0 : 1 / this->blobs_[2]->cpu_data()[0]; + 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_cpu_scale(this->blobs_[0]->count(), scale_factor, this->blobs_[0]->cpu_data(), mean_buffer_); caffe_cpu_scale(this->blobs_[1]->count(), scale_factor, diff --git a/src/caffe/layers/mkldnn_batch_norm_layer.cpp b/src/caffe/layers/mkldnn_batch_norm_layer.cpp index 90939c34b..b479f8828 100644 --- a/src/caffe/layers/mkldnn_batch_norm_layer.cpp +++ b/src/caffe/layers/mkldnn_batch_norm_layer.cpp @@ -130,7 +130,12 @@ void MKLDNNBatchNormLayer::Reshape(const vector*>& bottom this->num_ = bottom[0]->num(); this->channels_ = bottom[0]->channels(); - top[0]->Reshape(this->num_, this->channels_, this->height_, this->width_); + //Fix: should reshape the top blob with the real size of bottom blob + //top[0]->Reshape(this->num_, this->channels_, this->height_, this->width_); +#ifdef DEBUG + LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); +#endif + top[0]->ReshapeLike(*bottom[0]); } template @@ -146,12 +151,13 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott int32_t n = this->num_; int32_t iw = this->width_; int32_t ih = this->height_; - int32_t ic = this->channels_; + int32_t ic = this->channels_; bool bottom_data_is_prv = (const_cast(bottom[0]->prv_data()) != NULL); engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type mpcsn = memory::data_type::f32; + // ---- Initialize memory descriptors ------------- shared_ptr input_md, output_md, scaleshift_md; shared_ptr usr_mpd, prv_mpd, scaleshift_mpd; @@ -162,7 +168,7 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott usr_mpd = mem_descr->usr_memory_pd(); prv_mpd = mem_descr->prv_memory_pd(); } else { - input_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); + input_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); //MKLDNN batch norm only support 4D memory descriptor! usr_mpd.reset(new memory::primitive_desc(*input_md, cpu_engine)); } output_md = input_md; @@ -242,6 +248,23 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott fwd_bottom_data->set_mkldnn_primitive(BatchNormFwd); fwd_top_data->set_mkldnn_primitive(BatchNormFwd); + + //Fix: MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output! + bool has_spatial = (bottom[0]->shape().size() != 2); +#ifdef DEBUG + LOG(INFO) << "has_spatial flag value: " << has_spatial; +#endif + if (has_spatial == false) + { +#ifdef DEBUG + LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); + LOG(INFO) << "MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output!"; +#endif + vector top_shape; + top_shape.push_back(bottom[0]->num()); + top_shape.push_back(bottom[0]->channels()); + top[0]->Reshape(top_shape); + } } @@ -250,8 +273,11 @@ void MKLDNNBatchNormLayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { VLOG(1) << "MKLDNNBatchNormLayer::Forward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNBatchNormLayer::Forward_cpu: " << this->layer_param_.name(); +#endif - if( BatchNormFwd_pd == NULL) + if(BatchNormFwd_pd == NULL) InitBatchNorm(bottom, top); // making reorders if needed. fwd_bottom_data->sync_before_read(); @@ -323,8 +349,8 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type mpcsn = memory::data_type::f32; - // ---- Initialize memory descriptors ------------- + // ---- Initialize memory descriptors ------------- shared_ptr top_diff_md, top_data_md; shared_ptr usr_diff_mpd(NULL), prv_diff_mpd(NULL); if (top_diff_is_prv) { @@ -334,7 +360,7 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( usr_diff_mpd = mem_descr->usr_memory_pd(); prv_diff_mpd = mem_descr->prv_memory_pd(); } else { - top_diff_md.reset(new memory::desc({{n, c, h, w}}, mpcsn, memory::format::nchw)); + top_diff_md.reset(new memory::desc({{n, c, h, w}}, mpcsn, memory::format::nchw)); //MKLDNN batch norm only support 4D memory descriptor! usr_diff_mpd.reset(new memory::primitive_desc(*top_diff_md, cpu_engine)); } @@ -392,10 +418,13 @@ template void MKLDNNBatchNormLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - VLOG(1) << "MKLDNNBatchNormLayer::Backward_cpu: " - << this->layer_param_.name(); + VLOG(1) << "MKLDNNBatchNormLayer::Backward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNBatchNormLayer::Backward_cpu: " << this->layer_param_.name(); +#endif - if (BatchNormBwd_pd == NULL) InitBatchNormBwd(top, propagate_down, bottom); + if (BatchNormBwd_pd == NULL) + InitBatchNormBwd(top, propagate_down, bottom); // making reorders if needed. bwd_top_diff->sync_before_read(); // update bottom that head at prv @@ -403,7 +432,38 @@ void MKLDNNBatchNormLayer::Backward_cpu(const vector*>& top, PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); +#ifdef DEBUG + if (bottom[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Bottom prv data is NULL!"; + } + + if (top[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Top prv diff: " << *top[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Top prv diff is NULL!"; + LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } +#endif BatchNormBwd.submit(); +#ifdef DEBUG + if (bottom[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); + } +#endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); /* FIXME: this wouldn't work with lazy stream */ diff --git a/src/caffe/layers/mkldnn_concat_layer.cpp b/src/caffe/layers/mkldnn_concat_layer.cpp index ffdcf8edd..ee2cc5026 100644 --- a/src/caffe/layers/mkldnn_concat_layer.cpp +++ b/src/caffe/layers/mkldnn_concat_layer.cpp @@ -90,6 +90,31 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, const vector*>& top) { if (std::is_same::value) NOT_IMPLEMENTED; + //Fix: MKLDNN concat layer should use 4D blob as input! Reshape the 2D input blob into 4D for calculation! + bool has_spatial = (bottom[0]->shape().size() != 2); +#ifdef DEBUG + LOG(INFO) << "has_spatial flag value: " << has_spatial; +#endif + if (has_spatial == false) + { +#ifdef DEBUG + LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); + LOG(INFO) << "size of top blob: " << top[0]->shape().size(); + LOG(INFO) << "MKLDNN concat layer only support 4D blob as input! Reshape the 2D input blob into 4D for calculation!"; +#endif + vector bottom_4D_shape; + int bottom_4D_height = 1; + int bottom_4D_width = 1; + bottom_4D_shape.push_back(bottom[0]->num()); + bottom_4D_shape.push_back(bottom[0]->channels()); + bottom_4D_shape.push_back(bottom_4D_height); + bottom_4D_shape.push_back(bottom_4D_width); + for (auto i = 0; i < num_concats_; i++) + { + bottom[i]->Reshape(bottom_4D_shape); + } + } + engine cpu_engine = CpuEngine::Instance().get_engine(); memory::data_type data_type = memory::data_type::f32; // memory::format mfmt_any = memory::format::any; @@ -222,7 +247,10 @@ void MKLDNNConcatLayer::InitConcatBwd(const vector*>& top, template void MKLDNNConcatLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - //VLOG(1) << "MKLDNNConcatLayer::Forward_cpu: " << this->layer_param_.name(); + VLOG(1) << "MKLDNNConcatLayer::Forward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNConcatLayer::Forward_cpu: " << this->layer_param_.name(); +#endif if (NULL == concatFwd_pd) InitConcatFwd(bottom, top); @@ -244,7 +272,11 @@ void MKLDNNConcatLayer::Backward_cpu(const vector*>& top ,const vector& propagate_down ,const vector*>& bottom) { - //VLOG(1) << "MKLDNNConcatLayer::Backward_cpu: " << this->layer_param_.name(); + VLOG(1) << "MKLDNNConcatLayer::Backward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNConcatLayer::Backward_cpu: " << this->layer_param_.name(); +#endif + if (reorders.size() == 0) InitConcatBwd(top, propagate_down, bottom); bwd_top_diff->sync_before_read(); @@ -255,7 +287,6 @@ void MKLDNNConcatLayer::Backward_cpu(const vector*>& top reorders[i].submit(); PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); } - } #ifdef CPU_ONLY diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index 02dbd36c8..fa8e7fc15 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -1,511 +1,574 @@ -/* -All modification made by Intel Corporation: © 2016 Intel Corporation - -All contributions by the University of California: -Copyright (c) 2014, 2015, The Regents of the University of California (Regents) -All rights reserved. - -All other contributions: -Copyright (c) 2014, 2015, the respective contributors -All rights reserved. -For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef MKLDNN_SUPPORTED -#include -#include -#include - -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/layers/mkldnn_layers.hpp" -//#include "mkl_service.h" - -// TODO: Correct process case if there are no bias -// TODO: Exception handling - mkl-dnn produces exceptions on errors - -namespace caffe { - -template -MKLDNNConvolutionLayer::MKLDNNConvolutionLayer(const LayerParameter& param) - : MKLDNNLayer(), ConvolutionLayer(param) - , fwd_bottom_data(NULL), fwd_top_data(NULL), fwd_weights_data(NULL), fwd_bias_data(NULL) - , bwdd_weights_data(NULL), bwdw_bottom_data(NULL) - , bwdd_bottom_diff(NULL), bwdd_top_diff(NULL) - , bwdw_top_diff(NULL), bwdw_weights_diff(NULL), bwdw_bias_diff(NULL) - , convFwd_pd(NULL), convBwdData_pd(NULL), convBwdWeights_pd(NULL) - , fwd_top_data_memory(NULL), bwdd_bottom_diff_memory(NULL) - , bwdw_weights_diff_memory(NULL), bwdw_bias_diff_memory(NULL) - , fwd_bottom_data_primitive(NULL), fwd_weights_data_primitive(NULL), fwd_bias_data_primitive(NULL) - , bwdd_top_diff_primitive(NULL), bwdd_weights_data_primitive(NULL) - , bwdw_top_diff_primitive(NULL), bwdw_bottom_data_primitive(NULL) - , width_(0), height_(0), width_out_(0), height_out_(0), kernel_w_(0), kernel_h_(0) - , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0) -{ - PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); - PERFORMANCE_EVENT_ID_RESET(perf_id_bw_weights_); -} - -template -void MKLDNNConvolutionLayer::compute_output_shape() -{ - ConvolutionLayer::compute_output_shape(); - this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_) - / this->stride_h_ + 1; - this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_) - / this->stride_w_ + 1; -} - -template -void MKLDNNConvolutionLayer::init_properties(const vector*>& bottom - , const vector*>& top) -{ - this->stride_w_ = this->stride_.cpu_data()[0]; - this->stride_h_ = this->stride_.cpu_data()[1]; - this->width_ = bottom[0]->width(); - this->height_ = bottom[0]->height(); - this->pad_w_ = this->pad_.cpu_data()[0]; - this->pad_h_ = this->pad_.cpu_data()[1]; - this->kernel_w_ = this->kernel_shape_.cpu_data()[0]; - this->kernel_h_ = this->kernel_shape_.cpu_data()[1]; - -#ifdef USE_MLSL - if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) { - mn::OpRegInfo reg_info{ mn::train::get_session(), MLSL::OT_CC }; - reg_info.set_name(this->layer_param_.name()); - reg_info.add_parameter_set(this->channels_ * this->num_output_ / std::max(this->group_, 1), this->kernel_w_ * this->kernel_h_); - if (this->bias_term_) { - reg_info.add_parameter_set(this->num_output_, 1); - } - this->layerOp = mn::train::add_operation(reg_info); - } -#endif /* USE_MLSL */ -} - -template -void MKLDNNConvolutionLayer::LayerSetUp(const vector*>& bottom - , const vector*>& top) -{ - VLOG(1) << "<< MKLDNNConvolutionLayer::LayerSetUp: " << this->layer_param_.name(); - ConvolutionLayer::LayerSetUp(bottom, top); - init_properties(bottom, top); - this->bottom_shape_ = &bottom[0]->shape(); -} - -template -void MKLDNNConvolutionLayer::Reshape(const vector*>& bottom - , const vector*>& top) -{ - VLOG(1) << " MKLDNNConvolutionLayer::Reshape: " << this->layer_param_.name(); - BaseConvolutionLayer::ReshapeForMKL(bottom, top); - init_properties(bottom, top); -} - -template -void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector*>& bottom - , const vector*>& top) -{ - if (std::is_same::value) NOT_IMPLEMENTED; - auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training; - bool relu = this->layer_param_.convolution_param().relu(); - Dtype negative_slope; - if(relu) - { - propagation = prop_kind::forward_inference; - negative_slope = this->layer_param_.relu_param().negative_slope(); - } - - int32_t g = std::max(this->group_, 1); - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; - - int32_t ow = this->width_out_; - int32_t oh = this->height_out_; - int32_t oc = this->num_output_; - - int32_t kw = this->kernel_w_; - int32_t kh = this->kernel_h_; - - memory::dims convolutionStrides {this->stride_h_, this->stride_w_}; - memory::dims padding {this->pad_h_, this->pad_w_}; - - // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor ------------- - memory::data_type mpcsn = memory::data_type::f32; - memory::format mfmt_any = memory::format::any; - - memory::dims bottom_tz = {n, ic, ih, iw}; - memory::dims bias_tz = {oc}; - memory::dims top_tz = {n, oc, oh, ow}; - memory::dims weights_tz = ( g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw}; - - // ---- Memory descriptors for initializing of convolution primitive descriptor ------------- - memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any); - memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any); - memory::desc init_top_md({top_tz}, mpcsn, mfmt_any); - memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any); - - // ---- Initialize convolution primitive descriptor ------------- - shared_ptr convFwd_desc; - if (this->bias_term_) { - convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct - , init_bottom_md, init_weights_md, init_bias_md, init_top_md - , convolutionStrides, padding, padding, padding_kind::zero)); - } else { - convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct - , init_bottom_md, init_weights_md, init_top_md - , convolutionStrides, padding, padding, padding_kind::zero)); - } - shared_ptr convReluFwd_desc; - if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope)); - // ---- Determining engine to use ----------------------- - std::string subengines = this->layer_param_.engine(); - if (subengines == "" || subengines == "MKLDNN") - subengines = "MKLDNN:CPU"; - EngineParser ep(subengines); - unsigned subEngineIndex = 0; - shared_ptr convReluFwd_pd; - for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { - try { - convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc, - ep.getMKLDNNSubEngine(subEngineIndex))); - if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc, - ep.getMKLDNNSubEngine(subEngineIndex))); - } - catch(...) { - continue; - } - break; - } - - CHECK(convFwd_pd); - engine cpu_engine = CpuEngine::Instance().get_engine(); - - // ---- Create priv memory primitive descriptors stored as class members ------------- - typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc - - shared_ptr prv_fwd_bottom_data_memory_pd(new MemPD(convFwd_pd->src_primitive_desc())); - shared_ptr prv_fwd_top_data_memory_pd(new MemPD(convFwd_pd->dst_primitive_desc())); - shared_ptr prv_fwd_weights_data_memory_pd(new MemPD(convFwd_pd->weights_primitive_desc())); - - // ---- Create usr memory primitive descriptors ------------- - memory::format mfmt_nchw = memory::format::nchw; - memory::format weights_mfmt = ( g!= 1) ? memory::format::goihw : memory::format::oihw; - - // TODO: There should not be a problem to use this for Backward as well - shared_ptr usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine)); - shared_ptr usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); - - - // --- init primitive and prv_memory descriptors ---------------------- - fwd_bottom_data.reset(new MKLDNNData(usr_bottom_data_memory_pd, prv_fwd_bottom_data_memory_pd, bottom[0], this)); - fwd_bottom_data ->name = "fwd_bottom_data @ " + this->layer_param_.name(); - fwd_bottom_data_primitive = fwd_bottom_data->create_input(false); - - fwd_top_data.reset(new MKLDNNData(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this)); - fwd_top_data ->name = "fwd_top_data @ " + this->layer_param_.name(); - fwd_top_data_memory = fwd_top_data->create_output_memory(); - - fwd_weights_data.reset(new MKLDNNData(usr_weights_data_memory_pd, prv_fwd_weights_data_memory_pd, this->blobs_[0].get(), this)); - fwd_weights_data->name = "fwd_weights_data @ " + this->layer_param_.name(); - fwd_weights_data_primitive = fwd_weights_data->create_input(true); - - if (this->bias_term_) { - shared_ptr prv_fwd_bias_data_memory_pd(new MemPD(convFwd_pd->bias_primitive_desc())); - fwd_bias_data.reset(new MKLDNNData(usr_bias_data_memory_pd, prv_fwd_bias_data_memory_pd, this->blobs_[1].get(), this)); - fwd_bias_data->name = "fwd_bias_data @ " + this->layer_param_.name(); - fwd_bias_data_primitive = fwd_bias_data->create_input(true); - if(relu) { - convFwd.reset(new convolution_relu_forward(*convReluFwd_pd - , *fwd_bottom_data_primitive, *fwd_weights_data_primitive - , *fwd_bias_data_primitive, *fwd_top_data_memory)); - } else { - convFwd.reset(new convolution_forward(*convFwd_pd - , *fwd_bottom_data_primitive, *fwd_weights_data_primitive - , *fwd_bias_data_primitive, *fwd_top_data_memory)); +/* +All modification made by Intel Corporation: © 2016 Intel Corporation + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. +For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef MKLDNN_SUPPORTED +#include +#include +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/mkldnn_layers.hpp" +//#include "mkl_service.h" + +// TODO: Correct process case if there are no bias +// TODO: Exception handling - mkl-dnn produces exceptions on errors + +namespace caffe { + +template +MKLDNNConvolutionLayer::MKLDNNConvolutionLayer(const LayerParameter& param) + : MKLDNNLayer(), ConvolutionLayer(param) + , fwd_bottom_data(NULL), fwd_top_data(NULL), fwd_weights_data(NULL), fwd_bias_data(NULL) + , bwdd_weights_data(NULL), bwdw_bottom_data(NULL) + , bwdd_bottom_diff(NULL), bwdd_top_diff(NULL) + , bwdw_top_diff(NULL), bwdw_weights_diff(NULL), bwdw_bias_diff(NULL) + , convFwd_pd(NULL), convBwdData_pd(NULL), convBwdWeights_pd(NULL) + , fwd_top_data_memory(NULL), bwdd_bottom_diff_memory(NULL) + , bwdw_weights_diff_memory(NULL), bwdw_bias_diff_memory(NULL) + , fwd_bottom_data_primitive(NULL), fwd_weights_data_primitive(NULL), fwd_bias_data_primitive(NULL) + , bwdd_top_diff_primitive(NULL), bwdd_weights_data_primitive(NULL) + , bwdw_top_diff_primitive(NULL), bwdw_bottom_data_primitive(NULL) + , width_(0), height_(0), width_out_(0), height_out_(0), kernel_w_(0), kernel_h_(0) + , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0) +{ + PERFORMANCE_EVENT_ID_RESET(perf_id_fw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_); + PERFORMANCE_EVENT_ID_RESET(perf_id_bw_weights_); +} + +template +void MKLDNNConvolutionLayer::compute_output_shape() +{ + ConvolutionLayer::compute_output_shape(); + this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_) + / this->stride_h_ + 1; + this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_) + / this->stride_w_ + 1; +} + +template +void MKLDNNConvolutionLayer::init_properties(const vector*>& bottom + , const vector*>& top) +{ + this->stride_w_ = this->stride_.cpu_data()[1]; + this->stride_h_ = this->stride_.cpu_data()[0]; + this->width_ = bottom[0]->width(); + this->height_ = bottom[0]->height(); + this->pad_w_ = this->pad_.cpu_data()[1]; + this->pad_h_ = this->pad_.cpu_data()[0]; + this->kernel_w_ = this->kernel_shape_.cpu_data()[1]; + this->kernel_h_ = this->kernel_shape_.cpu_data()[0]; + +#ifdef USE_MLSL + if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) { + mn::OpRegInfo reg_info{ mn::train::get_session(), MLSL::OT_CC }; + reg_info.set_name(this->layer_param_.name()); + reg_info.add_parameter_set(this->channels_ * this->num_output_ / std::max(this->group_, 1), this->kernel_w_ * this->kernel_h_); + if (this->bias_term_) { + reg_info.add_parameter_set(this->num_output_, 1); + } + this->layerOp = mn::train::add_operation(reg_info); + } +#endif /* USE_MLSL */ +} + +template +void MKLDNNConvolutionLayer::LayerSetUp(const vector*>& bottom + , const vector*>& top) +{ + VLOG(1) << "<< MKLDNNConvolutionLayer::LayerSetUp: " << this->layer_param_.name(); + ConvolutionLayer::LayerSetUp(bottom, top); + init_properties(bottom, top); + this->bottom_shape_ = &bottom[0]->shape(); +} + +template +void MKLDNNConvolutionLayer::Reshape(const vector*>& bottom + , const vector*>& top) +{ + VLOG(1) << " MKLDNNConvolutionLayer::Reshape: " << this->layer_param_.name(); + BaseConvolutionLayer::ReshapeForMKL(bottom, top); + init_properties(bottom, top); +} + +template +void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector*>& bottom + , const vector*>& top) +{ + if (std::is_same::value) NOT_IMPLEMENTED; + auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training; + bool relu = this->layer_param_.convolution_param().relu(); + Dtype negative_slope = 0; + if(relu) + { + propagation = prop_kind::forward_inference; + negative_slope = this->layer_param_.relu_param().negative_slope(); + } + + int32_t g = std::max(this->group_, 1); + int32_t n = this->num_; + int32_t iw = this->width_; + int32_t ih = this->height_; + int32_t ic = this->channels_; + + int32_t ow = this->width_out_; + int32_t oh = this->height_out_; + int32_t oc = this->num_output_; + + int32_t kw = this->kernel_w_; + int32_t kh = this->kernel_h_; + + memory::dims convolutionStrides {this->stride_h_, this->stride_w_}; + memory::dims padding {this->pad_h_, this->pad_w_}; + + // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor ------------- + memory::data_type mpcsn = memory::data_type::f32; + memory::format mfmt_any = memory::format::any; + + memory::dims bottom_tz = {n, ic, ih, iw}; + memory::dims bias_tz = {oc}; + memory::dims top_tz = {n, oc, oh, ow}; + memory::dims weights_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw}; + + // ---- Memory descriptors for initializing of convolution primitive descriptor ------------- + memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any); + memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any); + memory::desc init_top_md({top_tz}, mpcsn, mfmt_any); + memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any); + + // ---- Initialize convolution primitive descriptor ------------- + shared_ptr convFwd_desc; + if (this->bias_term_) { + convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct + , init_bottom_md, init_weights_md, init_bias_md, init_top_md + , convolutionStrides, padding, padding, padding_kind::zero)); + } else { + convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct + , init_bottom_md, init_weights_md, init_top_md + , convolutionStrides, padding, padding, padding_kind::zero)); + } + shared_ptr convReluFwd_desc; + if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope)); + // ---- Determining engine to use ----------------------- + std::string subengines = this->layer_param_.engine(); + if (subengines == "" || subengines == "MKLDNN") + subengines = "MKLDNN:CPU"; + EngineParser ep(subengines); + unsigned subEngineIndex = 0; + shared_ptr convReluFwd_pd; + for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { + try { + convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc, + ep.getMKLDNNSubEngine(subEngineIndex))); + if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc, + ep.getMKLDNNSubEngine(subEngineIndex))); + } + catch(...) { + continue; + } + break; + } + + CHECK(convFwd_pd); + engine cpu_engine = CpuEngine::Instance().get_engine(); + + // ---- Create priv memory primitive descriptors stored as class members ------------- + typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc + + shared_ptr prv_fwd_bottom_data_memory_pd(new MemPD(convFwd_pd->src_primitive_desc())); + shared_ptr prv_fwd_top_data_memory_pd(new MemPD(convFwd_pd->dst_primitive_desc())); + shared_ptr prv_fwd_weights_data_memory_pd(new MemPD(convFwd_pd->weights_primitive_desc())); + + // ---- Create usr memory primitive descriptors ------------- + memory::format mfmt_nchw = memory::format::nchw; + memory::format weights_mfmt = (g!= 1) ? memory::format::goihw : memory::format::oihw; + + // TODO: There should not be a problem to use this for Backward as well + shared_ptr usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine)); + shared_ptr usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); + + + // --- init primitive and prv_memory descriptors ---------------------- + fwd_bottom_data.reset(new MKLDNNData(usr_bottom_data_memory_pd, prv_fwd_bottom_data_memory_pd, bottom[0], this)); + fwd_bottom_data ->name = "fwd_bottom_data @ " + this->layer_param_.name(); + fwd_bottom_data_primitive = fwd_bottom_data->create_input(false); + + fwd_top_data.reset(new MKLDNNData(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this)); + fwd_top_data ->name = "fwd_top_data @ " + this->layer_param_.name(); + fwd_top_data_memory = fwd_top_data->create_output_memory(); + + fwd_weights_data.reset(new MKLDNNData(usr_weights_data_memory_pd, prv_fwd_weights_data_memory_pd, this->blobs_[0].get(), this)); + fwd_weights_data->name = "fwd_weights_data @ " + this->layer_param_.name(); + fwd_weights_data_primitive = fwd_weights_data->create_input(true); + + if (this->bias_term_) { + shared_ptr prv_fwd_bias_data_memory_pd(new MemPD(convFwd_pd->bias_primitive_desc())); + fwd_bias_data.reset(new MKLDNNData(usr_bias_data_memory_pd, prv_fwd_bias_data_memory_pd, this->blobs_[1].get(), this)); + fwd_bias_data->name = "fwd_bias_data @ " + this->layer_param_.name(); + fwd_bias_data_primitive = fwd_bias_data->create_input(true); + if(relu) { + convFwd.reset(new convolution_relu_forward(*convReluFwd_pd + , *fwd_bottom_data_primitive, *fwd_weights_data_primitive + , *fwd_bias_data_primitive, *fwd_top_data_memory)); + } else { + convFwd.reset(new convolution_forward(*convFwd_pd + , *fwd_bottom_data_primitive, *fwd_weights_data_primitive + , *fwd_bias_data_primitive, *fwd_top_data_memory)); + } + fwd_bias_data->set_mkldnn_primitive(convFwd); + } else { + if(relu) { + convFwd.reset(new convolution_relu_forward(*convReluFwd_pd + , *fwd_bottom_data_primitive, *fwd_weights_data_primitive + , *fwd_top_data_memory)); + } else { + convFwd.reset(new convolution_forward(*convFwd_pd + , *fwd_bottom_data_primitive, *fwd_weights_data_primitive + , *fwd_top_data_memory)); + } + } + fwd_bottom_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + fwd_top_data->set_mkldnn_primitive(convFwd); + + //fwd_weights_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); + fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + + // Names are for debugging purposes only. +} + +template +void MKLDNNConvolutionLayer::Forward_cpu(const vector*>& bottom + , const vector*>& top) +{ + VLOG(1) << "MKLDNNConvolutionLayer::Forward_cpu: " << this->layer_param_.name(); + if( convFwd_pd == NULL) + InitConvolutionFwd(bottom, top); + // making reorders if needed. + fwd_bottom_data->sync_before_read(); + fwd_weights_data->sync_before_read(); + if (this->bias_term_) + fwd_bias_data->sync_before_read(); + // update top that head at prv + fwd_top_data->sync_before_write(); + + PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW")); + PERFORMANCE_MEASUREMENT_BEGIN(); + convFwd.submit(); + PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_); +} + + +template +void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector*>& top + , const vector& propagate_down + , const vector*>& bottom) +{ + if (std::is_same::value) NOT_IMPLEMENTED; + + int32_t g = std::max(this->group_, 1); + int32_t n = this->num_; + int32_t iw = this->width_; + int32_t ih = this->height_; + int32_t ic = this->channels_; + + int32_t ow = this->width_out_; + int32_t oh = this->height_out_; + int32_t oc = this->num_output_; + + int32_t kw = this->kernel_w_; + int32_t kh = this->kernel_h_; + + memory::dims convolutionStrides {this->stride_h_, this->stride_w_}; + memory::dims padding {this->pad_h_, this->pad_w_}; + + // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor ------------- + memory::data_type mpcsn = memory::data_type::f32; + memory::format mfmt_any = memory::format::any; + + memory::dims bottom_tz = {n, ic, ih, iw}; + memory::dims bias_tz = {oc}; + memory::dims top_tz = {n, oc, oh, ow}; + memory::dims weights_tz = ( g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw}; + + // ---- Memory descriptors for initializing of convolution primitive descriptor ------------- + memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any); + memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any); + memory::desc init_top_md({top_tz}, mpcsn, mfmt_any); + memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any); + + // ---- Initialize convolution primitive descriptor ------------- + shared_ptr convBwdData_desc; + shared_ptr convBwdWeights_desc; + if (this->bias_term_) { + convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct + , init_bottom_md, init_weights_md, init_bias_md, init_top_md + , convolutionStrides, padding, padding, padding_kind::zero)); + } else { + convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct + , init_bottom_md, init_weights_md, init_top_md + , convolutionStrides, padding, padding, padding_kind::zero)); + } + + convBwdData_desc.reset(new convolution_backward_data::desc(algorithm::convolution_direct + , init_bottom_md, init_weights_md, init_top_md + , convolutionStrides, padding, padding, padding_kind::zero)); + + // ---- Determining engine to use ----------------------- + std::string subengines = this->layer_param_.engine(); + if (subengines == "" || subengines == "MKLDNN") + subengines = "MKLDNN:CPU"; + EngineParser ep(subengines); + unsigned subEngineIndex = 0; + for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { + try { + convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc, + ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd)); + + convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc, + ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd)); + } + catch(...) { + continue; + } + break; + } + CHECK(convBwdData_pd); + CHECK(convBwdWeights_pd); + engine cpu_engine = CpuEngine::Instance().get_engine(); + + // ---- Create priv memory primitive descriptors stored as class members ------------- + typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc + + shared_ptr prv_bwdd_bottom_diff_memory_pd(new MemPD(convBwdData_pd->diff_src_primitive_desc())); + shared_ptr prv_bwdd_top_diff_memory_pd(new MemPD(convBwdData_pd->diff_dst_primitive_desc())); + shared_ptr prv_bwdd_weights_data_memory_pd(new MemPD(convBwdData_pd->weights_primitive_desc())); + + shared_ptr prv_bwdw_bottom_data_memory_pd(new MemPD(convBwdWeights_pd->src_primitive_desc())); + shared_ptr prv_bwdw_top_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_dst_primitive_desc())); + shared_ptr prv_bwdw_weights_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_weights_primitive_desc())); + + // ---- Create usr memory primitive descriptors ------------- + memory::format mfmt_nchw = memory::format::nchw; + memory::format weights_mfmt = ( g!= 1) ? memory::format::goihw : memory::format::oihw; + + // ???!!! can we use usr memory primitive descrittors for backward?? + shared_ptr usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine)); + shared_ptr usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); + + + // --- init primitive and prv_memory descriptors ---------------------- + bwdd_bottom_diff.reset(new MKLDNNDiff(usr_bottom_data_memory_pd, prv_bwdd_bottom_diff_memory_pd, bottom[0], this)); + bwdd_bottom_diff ->name = "bwdd_bottom_diff @ " + this->layer_param_.name(); + bwdd_bottom_diff_memory = bwdd_bottom_diff->create_output_memory(); + bwdw_bottom_data.reset(new MKLDNNData(usr_bottom_data_memory_pd, prv_bwdw_bottom_data_memory_pd, bottom[0], this)); + bwdw_bottom_data ->name = "bwdw_bottom_data @ " + this->layer_param_.name(); + bwdw_bottom_data_primitive = bwdw_bottom_data->create_input(false); + + bwdd_top_diff.reset(new MKLDNNDiff(usr_top_data_memory_pd, prv_bwdd_top_diff_memory_pd, top[0], this)); + bwdd_top_diff ->name = "bwdd_top_diff @ " + this->layer_param_.name(); + bwdd_top_diff_primitive = bwdd_top_diff->create_input(false); + bwdw_top_diff.reset(new MKLDNNDiff(usr_top_data_memory_pd, prv_bwdw_top_diff_memory_pd, top[0], this)); + bwdw_top_diff ->name = "bwdw_top_diff @ " + this->layer_param_.name(); + bwdw_top_diff_primitive = bwdw_top_diff->create_input(false); + + bwdd_weights_data.reset(new MKLDNNData(usr_weights_data_memory_pd, prv_bwdd_weights_data_memory_pd, this->blobs_[0].get(), this)); + bwdd_weights_data->name = "bwdd_weights_data @ " + this->layer_param_.name(); + bwdd_weights_data_primitive = bwdd_weights_data->create_input(false); + bwdw_weights_diff.reset(new MKLDNNDiff(usr_weights_data_memory_pd, prv_bwdw_weights_diff_memory_pd, this->blobs_[0].get(), this)); + bwdw_weights_diff->name = "bwdw_weights_diff @ " + this->layer_param_.name(); + bwdw_weights_diff_memory = bwdw_weights_diff->create_output_memory(); + + if (this->bias_term_) { + shared_ptr prv_bwdw_bias_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_bias_primitive_desc())); + bwdw_bias_diff.reset(new MKLDNNDiff(usr_bias_data_memory_pd, prv_bwdw_bias_diff_memory_pd, this->blobs_[1].get(), this)); + bwdw_bias_diff->name = "bwdw_bias_diff @ " + this->layer_param_.name(); + bwdw_bias_diff_memory = bwdw_bias_diff->create_output_memory(); + + convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd + , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive + , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory)); + + bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); + } else { + convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd + , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive + , *bwdw_weights_diff_memory)); + } + + convBwdData.reset(new convolution_backward_data(*convBwdData_pd + , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive + , *bwdd_bottom_diff_memory)); + + bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); + + bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + + //bwdd_weights_data->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); + bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); + + + bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + + bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); + + bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); + + // Names are for debugging purposes only. +} + + +template +void MKLDNNConvolutionLayer::Backward_cpu(const vector*>& top + , const vector& propagate_down + , const vector*>& bottom) +{ + VLOG(1) << "MKLDNNConvolutionLayer::Backward_cpu: " << this->layer_param_.name(); + if( convBwdData_pd == NULL) + InitConvolutionBwd(top, propagate_down, bottom); + if (propagate_down[0]) { + // making reorders if needed. + bwdd_top_diff->sync_before_read(); + bwdd_weights_data->sync_before_read(); + bwdd_bottom_diff->sync_before_write(); + + PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); + PERFORMANCE_MEASUREMENT_BEGIN(); +#ifdef DEBUG + if (bottom[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data(); } - fwd_bias_data->set_mkldnn_primitive(convFwd); - } else { - if(relu) { - convFwd.reset(new convolution_relu_forward(*convReluFwd_pd - , *fwd_bottom_data_primitive, *fwd_weights_data_primitive - , *fwd_top_data_memory)); - } else { - convFwd.reset(new convolution_forward(*convFwd_pd - , *fwd_bottom_data_primitive, *fwd_weights_data_primitive - , *fwd_top_data_memory)); + else + { + LOG(INFO) << "Debug: Bottom prv data is NULL!"; + //LOG(INFO) << "Debug: Bottom cpu data: " << *bottom[0]->cpu_data(); } - } - fwd_bottom_data->set_mkldnn_primitive(convFwd); - fwd_top_data->set_mkldnn_primitive(convFwd); - fwd_weights_data->set_mkldnn_primitive(convFwd); - - // Names are for debugging purposes only. -} - -template -void MKLDNNConvolutionLayer::Forward_cpu(const vector*>& bottom - , const vector*>& top) -{ - VLOG(1) << "MKLDNNConvolutionLayer::Forward_cpu: " << this->layer_param_.name(); - if( convFwd_pd == NULL) - InitConvolutionFwd(bottom, top); - // making reorders if needed. - fwd_bottom_data->sync_before_read(); - fwd_weights_data->sync_before_read(); - if (this->bias_term_) - fwd_bias_data->sync_before_read(); - // update top that head at prv - fwd_top_data->sync_before_write(); - - PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW")); - PERFORMANCE_MEASUREMENT_BEGIN(); - convFwd.submit(); - PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_); -} - -template -void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector*>& top - , const vector& propagate_down - , const vector*>& bottom) -{ - if (std::is_same::value) NOT_IMPLEMENTED; - - int32_t g = std::max(this->group_, 1); - int32_t n = this->num_; - int32_t iw = this->width_; - int32_t ih = this->height_; - int32_t ic = this->channels_; - - int32_t ow = this->width_out_; - int32_t oh = this->height_out_; - int32_t oc = this->num_output_; - - int32_t kw = this->kernel_w_; - int32_t kh = this->kernel_h_; - - memory::dims convolutionStrides {this->stride_h_, this->stride_w_}; - memory::dims padding {this->pad_h_, this->pad_w_}; - - // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor ------------- - memory::data_type mpcsn = memory::data_type::f32; - memory::format mfmt_any = memory::format::any; - - memory::dims bottom_tz = {n, ic, ih, iw}; - memory::dims bias_tz = {oc}; - memory::dims top_tz = {n, oc, oh, ow}; - memory::dims weights_tz = ( g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw}; - - // ---- Memory descriptors for initializing of convolution primitive descriptor ------------- - memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any); - memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any); - memory::desc init_top_md({top_tz}, mpcsn, mfmt_any); - memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any); - - // ---- Initialize convolution primitive descriptor ------------- - shared_ptr convBwdData_desc; - shared_ptr convBwdWeights_desc; - if (this->bias_term_) { - convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct - , init_bottom_md, init_weights_md, init_bias_md, init_top_md - , convolutionStrides, padding, padding, padding_kind::zero)); - } else { - convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct - , init_bottom_md, init_weights_md, init_top_md - , convolutionStrides, padding, padding, padding_kind::zero)); - } - - convBwdData_desc.reset(new convolution_backward_data::desc(algorithm::convolution_direct - , init_bottom_md, init_weights_md, init_top_md - , convolutionStrides, padding, padding, padding_kind::zero)); - - // ---- Determining engine to use ----------------------- - std::string subengines = this->layer_param_.engine(); - if (subengines == "" || subengines == "MKLDNN") - subengines = "MKLDNN:CPU"; - EngineParser ep(subengines); - unsigned subEngineIndex = 0; - for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { - try { - convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc, - ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd)); - - convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc, - ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd)); - } - catch(...) { - continue; - } - break; - } - CHECK(convBwdData_pd); - CHECK(convBwdWeights_pd); - engine cpu_engine = CpuEngine::Instance().get_engine(); - - // ---- Create priv memory primitive descriptors stored as class members ------------- - typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc - - shared_ptr prv_bwdd_bottom_diff_memory_pd(new MemPD(convBwdData_pd->diff_src_primitive_desc())); - shared_ptr prv_bwdd_top_diff_memory_pd(new MemPD(convBwdData_pd->diff_dst_primitive_desc())); - shared_ptr prv_bwdd_weights_data_memory_pd(new MemPD(convBwdData_pd->weights_primitive_desc())); - - shared_ptr prv_bwdw_bottom_data_memory_pd(new MemPD(convBwdWeights_pd->src_primitive_desc())); - shared_ptr prv_bwdw_top_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_dst_primitive_desc())); - shared_ptr prv_bwdw_weights_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_weights_primitive_desc())); - - // ---- Create usr memory primitive descriptors ------------- - memory::format mfmt_nchw = memory::format::nchw; - memory::format weights_mfmt = ( g!= 1) ? memory::format::goihw : memory::format::oihw; - - // ???!!! can we use usr memory primitive descrittors for backward?? - shared_ptr usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine)); - shared_ptr usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); - - - // --- init primitive and prv_memory descriptors ---------------------- - bwdd_bottom_diff.reset(new MKLDNNDiff(usr_bottom_data_memory_pd, prv_bwdd_bottom_diff_memory_pd, bottom[0], this)); - bwdd_bottom_diff ->name = "bwdd_bottom_diff @ " + this->layer_param_.name(); - bwdd_bottom_diff_memory = bwdd_bottom_diff->create_output_memory(); - bwdw_bottom_data.reset(new MKLDNNData(usr_bottom_data_memory_pd, prv_bwdw_bottom_data_memory_pd, bottom[0], this)); - bwdw_bottom_data ->name = "bwdw_bottom_data @ " + this->layer_param_.name(); - bwdw_bottom_data_primitive = bwdw_bottom_data->create_input(false); - - bwdd_top_diff.reset(new MKLDNNDiff(usr_top_data_memory_pd, prv_bwdd_top_diff_memory_pd, top[0], this)); - bwdd_top_diff ->name = "bwdd_top_diff @ " + this->layer_param_.name(); - bwdd_top_diff_primitive = bwdd_top_diff->create_input(false); - bwdw_top_diff.reset(new MKLDNNDiff(usr_top_data_memory_pd, prv_bwdw_top_diff_memory_pd, top[0], this)); - bwdw_top_diff ->name = "bwdw_top_diff @ " + this->layer_param_.name(); - bwdw_top_diff_primitive = bwdw_top_diff->create_input(false); - - bwdd_weights_data.reset(new MKLDNNData(usr_weights_data_memory_pd, prv_bwdd_weights_data_memory_pd, this->blobs_[0].get(), this)); - bwdd_weights_data->name = "bwdd_weights_data @ " + this->layer_param_.name(); - bwdd_weights_data_primitive = bwdd_weights_data->create_input(false); - bwdw_weights_diff.reset(new MKLDNNDiff(usr_weights_data_memory_pd, prv_bwdw_weights_diff_memory_pd, this->blobs_[0].get(), this)); - bwdw_weights_diff->name = "bwdw_weights_diff @ " + this->layer_param_.name(); - bwdw_weights_diff_memory = bwdw_weights_diff->create_output_memory(); - - if (this->bias_term_) { - shared_ptr prv_bwdw_bias_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_bias_primitive_desc())); - bwdw_bias_diff.reset(new MKLDNNDiff(usr_bias_data_memory_pd, prv_bwdw_bias_diff_memory_pd, this->blobs_[1].get(), this)); - bwdw_bias_diff->name = "bwdw_bias_diff @ " + this->layer_param_.name(); - bwdw_bias_diff_memory = bwdw_bias_diff->create_output_memory(); - - convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd - , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive - , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory)); - - bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); - } else { - convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd - , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive - , *bwdw_weights_diff_memory)); - } - - convBwdData.reset(new convolution_backward_data(*convBwdData_pd - , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive - , *bwdd_bottom_diff_memory)); - - bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); - bwdd_top_diff->set_mkldnn_primitive(convBwdData); - bwdd_weights_data->set_mkldnn_primitive(convBwdData); - - bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); - bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); - bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); - - // Names are for debugging purposes only. -} - - -template -void MKLDNNConvolutionLayer::Backward_cpu(const vector*>& top - , const vector& propagate_down - , const vector*>& bottom) -{ - VLOG(1) << "MKLDNNConvolutionLayer::Backward_cpu: " << this->layer_param_.name(); - if( convBwdData_pd == NULL) - InitConvolutionBwd(top, propagate_down, bottom); - if (propagate_down[0]) { - // making reorders if needed. - bwdd_top_diff->sync_before_read(); - bwdd_weights_data->sync_before_read(); - bwdd_bottom_diff->sync_before_write(); - - PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); - PERFORMANCE_MEASUREMENT_BEGIN(); - convBwdData.submit(); - PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); - } - if (this->param_propagate_down(0)) { - // making reorders if needed. - bwdw_top_diff->sync_before_read(); - bwdw_bottom_data->sync_before_read(); - // update top that head at prv - bwdw_weights_diff->sync_before_write(); - if (this->param_propagate_down(1)) { - CHECK(bwdw_bias_diff); - bwdw_bias_diff->sync_before_write(); + if (top[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Top prv diff: " << *top[0]->prv_diff(); } - PERFORMANCE_EVENT_ID_INIT(perf_id_bw_weights_, - PERFORMANCE_MKLDNN_NAME_DETAILED("BW", "_weights")); - PERFORMANCE_MEASUREMENT_BEGIN(); - convBwdWeights.submit(); - PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_weights_); - } -} - -#ifdef CPU_ONLY -STUB_GPU(MKLDNNConvolutionLayer); -#else - -template -void MKLDNNConvolutionLayer::Forward_gpu(const vector*>& bottom - , const vector*>& top) -{ - NOT_IMPLEMENTED; -} - -template -void MKLDNNConvolutionLayer::Backward_gpu(const vector*>& top - , const vector& propagate_down - , const vector*>& bottom) -{ - NOT_IMPLEMENTED; -} -#endif - -INSTANTIATE_CLASS(MKLDNNConvolutionLayer); - -} // namespace caffe -#endif // #ifdef MKLDNN_SUPPORTED + else + { + LOG(INFO) << "Debug: Top prv diff is NULL!"; + LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } + + if (this->blobs_[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Weights prv data from blobs_[0]: " << *this->blobs_[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Weights prv data is NULL!"; + LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); + } + //Before submit, so get_prv_ptr() always has the value + LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr(); +#endif + convBwdData.submit(); +#ifdef DEBUG + if (bottom[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); + } +#endif + PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); + } + if (this->param_propagate_down(0)) { + // making reorders if needed. + bwdw_top_diff->sync_before_read(); + bwdw_bottom_data->sync_before_read(); + // update top that head at prv + bwdw_weights_diff->sync_before_write(); + if (this->param_propagate_down(1)) { + CHECK(bwdw_bias_diff); + bwdw_bias_diff->sync_before_write(); + } + PERFORMANCE_EVENT_ID_INIT(perf_id_bw_weights_, + PERFORMANCE_MKLDNN_NAME_DETAILED("BW", "_weights")); + PERFORMANCE_MEASUREMENT_BEGIN(); + convBwdWeights.submit(); + PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_weights_); + } +} + +#ifdef CPU_ONLY +STUB_GPU(MKLDNNConvolutionLayer); +#else + +template +void MKLDNNConvolutionLayer::Forward_gpu(const vector*>& bottom + , const vector*>& top) +{ + NOT_IMPLEMENTED; +} + +template +void MKLDNNConvolutionLayer::Backward_gpu(const vector*>& top + , const vector& propagate_down + , const vector*>& bottom) +{ + NOT_IMPLEMENTED; +} +#endif + +INSTANTIATE_CLASS(MKLDNNConvolutionLayer); + +} // namespace caffe +#endif // #ifdef MKLDNN_SUPPORTED diff --git a/src/caffe/layers/mkldnn_eltwise_layer.cpp b/src/caffe/layers/mkldnn_eltwise_layer.cpp index 96ced307d..2a4a87c79 100644 --- a/src/caffe/layers/mkldnn_eltwise_layer.cpp +++ b/src/caffe/layers/mkldnn_eltwise_layer.cpp @@ -182,16 +182,17 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto } shared_ptr usr_top_data_mpd(new memory::primitive_desc( - {{n, ic, ih, iw}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr prv_top_data_mpd(new memory::primitive_desc({{n, ic, ih, iw}, mpcsn, mfmt_nchw}, cpu_engine)); + {{n, ic, ih, iw}, mpcsn, mfmt_nchw}, cpu_engine)); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); if (subengines == "" || subengines == "MKLDNN") subengines = "MKLDNN:CPU"; - eltwiseFwd_pd.reset(new sum::primitive_desc({{n, ic, ih, iw}, mpcsn, mfmt_nchw}, scale, bottom_data_mpd)); + eltwiseFwd_pd.reset(new sum::primitive_desc({{n, ic, ih, iw}, mpcsn, memory::format::any}, scale, bottom_data_mpd)); CHECK(eltwiseFwd_pd); + shared_ptr prv_top_data_mpd(new memory::primitive_desc(eltwiseFwd_pd->dst_primitive_desc())); + fwd_top_data.reset(new MKLDNNData(usr_top_data_mpd, prv_top_data_mpd, top[0], this)); fwd_top_data->name = "fwd_top_data @ " + this->layer_param_.name(); fwd_top_data_memory = fwd_top_data->create_output_memory(); diff --git a/src/caffe/layers/mkldnn_inner_product_layer.cpp b/src/caffe/layers/mkldnn_inner_product_layer.cpp index c9ee14b81..d2fe6cfaa 100644 --- a/src/caffe/layers/mkldnn_inner_product_layer.cpp +++ b/src/caffe/layers/mkldnn_inner_product_layer.cpp @@ -127,7 +127,7 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vectorh_; int32_t oc = this->N_; int32_t ic = this->K_/h_/w_; - bool has_spatial = h > 1 || w > 1; + bool has_spatial = (bottom[0]->shape().size() != 2); // Initialize memory descriptors (fromat = any) to create inner_product descriptor memory::data_type mpcsn = memory::data_type::f32; @@ -138,6 +138,20 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vector::InitInnerProductFwd(const vector prv_fwd_bottom_data_memory_pd(new MemPD(ipFwd_pd->src_primitive_desc())); shared_ptr prv_fwd_top_data_memory_pd(new MemPD(ipFwd_pd->dst_primitive_desc())); shared_ptr prv_fwd_weights_data_memory_pd(new MemPD(ipFwd_pd->weights_primitive_desc())); - shared_ptr prv_fwd_bias_data_memory_pd(new MemPD(ipFwd_pd->bias_primitive_desc())); - + // Create usr memory primitive descriptors stored as class members engine cpu_engine = CpuEngine::Instance().get_engine(); memory::format input_mfmt = has_spatial ? memory::format::nchw : memory::format::nc; @@ -189,6 +202,10 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vector usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, memory::format::nc}, cpu_engine)); memory::format weights_mfmt = has_spatial ? memory::format::oihw : memory::format::oi; shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); +#ifdef DEBUG + LOG(INFO) << "Memory format of usr_bottom_data_memory_pd: " << input_mfmt; + LOG(INFO) << "Memory format of usr_weights_data_memory_pd: " << weights_mfmt; +#endif // --- init primitive and prv_memory descriptors ---------------------- fwd_bottom_data.reset(new MKLDNNData(usr_bottom_data_memory_pd, prv_fwd_bottom_data_memory_pd, bottom[0], this)); @@ -204,6 +221,7 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vectorcreate_input(true); if (this->bias_term_) { + shared_ptr prv_fwd_bias_data_memory_pd(new MemPD(ipFwd_pd->bias_primitive_desc())); fwd_bias_data.reset(new MKLDNNData(usr_bias_data_memory_pd, prv_fwd_bias_data_memory_pd, this->blobs_[1].get(), this)); fwd_bias_data ->name = "fwd_bias_data @ " + this->layer_param_.name(); fwd_bias_data_primitive = fwd_bias_data->create_input(true); @@ -215,10 +233,20 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vectorset_mkldnn_primitive(ipFwd); + + //Because the inputs of inner product layer always come from user memory, so will not trigger the wrong reorder from extprv to prv + fwd_bottom_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + fwd_top_data->set_mkldnn_primitive(ipFwd); - fwd_weights_data->set_mkldnn_primitive(ipFwd); - fwd_bias_data->set_mkldnn_primitive(ipFwd); + + fwd_weights_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); + //fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + + if (this->bias_term_) + fwd_bias_data->set_mkldnn_primitive(ipFwd); } template @@ -226,12 +254,17 @@ void MKLDNNInnerProductLayer::Forward_cpu(const vector*>& bot , const vector*>& top) { VLOG(1) << "MKLDNNInnerProductLayer::Forward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNInnerProductLayer::Forward_cpu: " << this->layer_param_.name(); +#endif + if( ipFwd_pd == NULL) InitInnerProductFwd(bottom, top); // making reorders if needed. fwd_bottom_data->sync_before_read(); fwd_weights_data->sync_before_read(); - fwd_bias_data->sync_before_read(); + if (this->bias_term_) + fwd_bias_data->sync_before_read(); // update top that head at prv fwd_top_data->sync_before_write(); @@ -253,7 +286,7 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vectorh_; int32_t oc = this->N_; int32_t ic = this->K_/h_/w_; - bool has_spatial = h > 1 || w > 1; + bool has_spatial = (bottom[0]->shape().size() != 2); // Initialize memory descriptors (format = any) to create inner_product descriptor memory::data_type mpcsn = memory::data_type::f32; @@ -264,6 +297,20 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vector::InitInnerProductBwd(const vector ipBwdData_desc; shared_ptr ipBwdWeights_desc; - + if (this->bias_term_) ipBwdWeights_desc.reset(new inner_product_backward_weights::desc(init_bottom_md, init_weights_md , init_bias_md, init_top_md)); + else + ipBwdWeights_desc.reset(new inner_product_backward_weights::desc(init_bottom_md, init_weights_md + , init_top_md)); + ipBwdData_desc.reset(new inner_product_backward_data::desc(init_bottom_md, init_weights_md, init_top_md)); // ---- Determining engine to use ----------------------- @@ -310,16 +361,19 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vector prv_bwdw_bottom_data_memory_pd(new MemPD(ipBwdWeights_pd->src_primitive_desc())); shared_ptr prv_bwdw_top_diff_memory_pd(new MemPD(ipBwdWeights_pd->diff_dst_primitive_desc())); shared_ptr prv_bwdw_weights_diff_memory_pd(new MemPD(ipBwdWeights_pd->diff_weights_primitive_desc())); - shared_ptr prv_bwdw_bias_diff_memory_pd(new MemPD(ipBwdWeights_pd->diff_bias_primitive_desc())); // Create usr memory primitive descriptors stored as class members engine cpu_engine = CpuEngine::Instance().get_engine(); - memory::format input_mfmt = has_spatial ? memory::format::nchw : memory::format::nc; + memory::format input_mfmt = has_spatial ? memory::format::nchw : memory::format::nc; shared_ptr usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, input_mfmt}, cpu_engine)); shared_ptr usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine)); shared_ptr usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, memory::format::nc}, cpu_engine)); memory::format weights_mfmt = has_spatial ? memory::format::oihw : memory::format::oi; shared_ptr usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine)); +#ifdef DEBUG + LOG(INFO) << "Memory format of usr_bottom_data_memory_pd: " << input_mfmt; + LOG(INFO) << "Memory format of usr_weights_data_memory_pd: " << weights_mfmt; +#endif // --- init primitive and prv_memory descriptors ---------------------- bwdd_bottom_diff.reset(new MKLDNNDiff(usr_bottom_data_memory_pd, prv_bwdd_bottom_diff_memory_pd, bottom[0], this)); @@ -343,8 +397,8 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vectorname = "bwdw_weights_diff @ " + this->layer_param_.name(); bwdw_weights_diff_memory = bwdw_weights_diff->create_output_memory(); - if (this->bias_term_) { + shared_ptr prv_bwdw_bias_diff_memory_pd(new MemPD(ipBwdWeights_pd->diff_bias_primitive_desc())); bwdw_bias_diff.reset(new MKLDNNDiff(usr_bias_data_memory_pd, prv_bwdw_bias_diff_memory_pd, this->blobs_[1].get(), this)); bwdw_bias_diff ->name = "bwdw_bias_diff @ " + this->layer_param_.name(); bwdw_bias_diff_memory = bwdw_bias_diff->create_output_memory(); @@ -363,13 +417,28 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vectorset_mkldnn_primitive(ipBwdData); - bwdd_top_diff->set_mkldnn_primitive(ipBwdData); - bwdd_weights_data->set_mkldnn_primitive(ipBwdData); + + bwdd_top_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + + bwdd_weights_data->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); + //bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); + + + bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + + bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); - bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); - bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); bwdw_weights_diff->set_mkldnn_primitive(ipBwdWeights); - bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); + + if (this->bias_term_) + bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); } @@ -380,6 +449,10 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to , const vector*>& bottom) { VLOG(1) << "MKLDNNInnerProductLayer::Backward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNInnerProductLayer::Backward_cpu: " << this->layer_param_.name(); +#endif + if( ipBwdData_pd == NULL) InitInnerProductBwd(top, propagate_down, bottom); if (propagate_down[0]) { @@ -390,7 +463,52 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); +#ifdef DEBUG + if (bottom[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Bottom prv data is NULL!"; + //LOG(INFO) << "Debug: Bottom cpu data: " << *bottom[0]->cpu_data(); + //Chong: if don't have this LOG print, will cause: this->_cpu_ptr == cpu_ptr crash, without the fix in dropout_layer.cpp + } + + if (top[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Top prv diff: " << *top[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Top prv diff is NULL!"; + LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } + + if (this->blobs_[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Weights prv data from blobs_[0]: " << *this->blobs_[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Weights prv data is NULL!"; + LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); + } + //Before submit, so get_prv_ptr() always has the value + LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr(); +#endif ipBwdData.submit(); +#ifdef DEBUG + if (bottom[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); + } +#endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); } if (this->param_propagate_down(0)) { diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp index ae118de7c..48ea4e884 100644 --- a/src/caffe/layers/mkldnn_lrn_layer.cpp +++ b/src/caffe/layers/mkldnn_lrn_layer.cpp @@ -138,7 +138,7 @@ void MKLDNNLRNLayer::InitLRNFwd(const vector*>& bottom, const memory::data_type mpcsn = memory::data_type::f32; // ---- Initialize memory descriptors ------------- memory::dims tz = {n, ic, ih, iw}; - shared_ptr bottom_md, top_md; + shared_ptr top_md; shared_ptr usr_mpd, prv_mpd; if (bottom_data_is_prv) { shared_ptr > mem_descr @@ -266,7 +266,7 @@ void MKLDNNLRNLayer::InitLRNBwd(const vector*>& top bottom_diff_md = top_diff_md; // ---- Initialize LRN primitive descriptor ------------- - lrn_backward::desc lrnBwd_desc(lrn_algorithm, *bottom_diff_md, *top_diff_md, + lrn_backward::desc lrnBwd_desc(lrn_algorithm, *bottom_md, *top_diff_md, size_, alpha_, beta_); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp index fe1ae7d03..6bce42a1c 100644 --- a/src/caffe/layers/mkldnn_pooling_layer.cpp +++ b/src/caffe/layers/mkldnn_pooling_layer.cpp @@ -289,6 +289,10 @@ void MKLDNNPoolingLayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { VLOG(1) << "MKLDNNPoolingLayer::Forward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNPoolingLayer::Forward_cpu: " << this->layer_param_.name(); +#endif + if (NULL == poolingFwd_pd) InitPoolingFwd(bottom, top); // making reorders if needed. @@ -422,6 +426,10 @@ void MKLDNNPoolingLayer::Backward_cpu(const vector*>& top , const vector*>& bottom) { VLOG(1) << "MKLDNNPoolingLayer::Backward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNPoolingLayer::Backward_cpu: " << this->layer_param_.name(); +#endif + if (!propagate_down[0]) { return; } @@ -433,7 +441,38 @@ void MKLDNNPoolingLayer::Backward_cpu(const vector*>& top PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); +#ifdef DEBUG + if (bottom[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Bottom prv data is NULL!"; + //LOG(INFO) << "Debug: Bottom cpu data: " << *bottom[0]->cpu_data(); + } + + if (top[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Top prv diff: " << *top[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Top prv diff is NULL!"; + //LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } +#endif poolingBwd.submit(); +#ifdef DEBUG + if (bottom[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + } +#endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); } diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 541684fbc..7b6cb2e06 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -140,6 +140,10 @@ void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { VLOG(1) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); +#endif + bool inplace = (bottom[0] == top[0]); if( reluFwd_pd == NULL) InitReLUFwd(bottom, top); @@ -184,10 +188,31 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top if (top_diff_is_prv) { shared_ptr > mem_descr = get_mkldnn_prv_descriptor(top[0]); +#ifdef DEBUG + memory::format bwd_prv_top_diff_mfmt = static_cast(mem_descr->prv_memory_pd()->desc().data.format); + LOG(INFO) << "MKLDNNReLULayer::InitReLUBwd: memory format of prv top diff is: " << bwd_prv_top_diff_mfmt; +#endif top_diff_md.reset(new memory::desc(mem_descr->prv_memory_pd()->desc())); usr_diff_mpd = mem_descr->usr_memory_pd(); prv_diff_mpd = mem_descr->prv_memory_pd(); } else { + bool bottom_data_is_prv = (const_cast(bottom[0]->prv_data()) != NULL); + if (bottom_data_is_prv) { + shared_ptr > mem_descr + = get_mkldnn_prv_descriptor(bottom[0]); +#ifdef DEBUG + memory::format fwd_prv_bottom_data_mfmt = static_cast(mem_descr->prv_memory_pd()->desc().data.format); + LOG(INFO) << "MKLDNNReLULayer::InitReLUBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt; + LOG(INFO) << "MKLDNNReLULayer::InitReLUBwd: Reorder the usr top diff to the format of prv bottom data! (Performance consideration)"; +#endif + prv_diff_mpd = mem_descr->prv_memory_pd(); + //top[0]->prv_data() is empty, however top[0]->get_prv_diff_descriptor() has value. + //Find root cause in the mkldnn_memory: create_output_memory() and sync_before_write() functions. + //But that a major fix, will lead the nan in the AlexNet training. + //So need investigation further, however, this will fix ICL-84. + top[0]->set_prv_diff_descriptor(NULL); + } + top_diff_md.reset(new memory::desc({{n, ic, ih, iw}}, mpcsn, memory::format::nchw)); usr_diff_mpd.reset(new memory::primitive_desc(*top_diff_md, cpu_engine)); } @@ -236,6 +261,10 @@ void MKLDNNReLULayer::Backward_cpu(const vector*>& top , const vector*>& bottom) { VLOG(1) << "MKLDNNReLULayer::Backward_cpu: " << this->layer_param_.name(); +#ifdef DEBUG + LOG(INFO) << "MKLDNNReLULayer::Backward_cpu: " << this->layer_param_.name(); +#endif + bool inplace = (bottom[0] == top[0]); if (!propagate_down[0]) { return; @@ -249,7 +278,36 @@ void MKLDNNReLULayer::Backward_cpu(const vector*>& top PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); +#ifdef DEBUG + if (bottom[0]->prv_data() != NULL) + { + LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data(); + } + else + { + LOG(INFO) << "Debug: Bottom prv data is NULL!"; + } + + if (top[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Top prv diff: " << *top[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Top prv diff is NULL!"; + } +#endif reluBwd.submit(); +#ifdef DEBUG + if (bottom[0]->prv_diff() != NULL) + { + LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff(); + } + else + { + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + } +#endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); } diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 70908d54c..0aeab4ee4 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -132,24 +132,73 @@ void SoftmaxWithLossLayer::Forward_cpu( int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; - for (int i = 0; i < outer_num_; ++i) { - for (int j = 0; j < inner_num_; j++) { - const int label_value = static_cast(label[i * inner_num_ + j]); - if (has_ignore_label_ && label_value == ignore_label_) { - continue; + if (bottom.size() == 3) { + const Dtype* weights = bottom[2]->cpu_data(); + Dtype weighted_sum = 0; + Dtype weighted_sum_local = 0; + Dtype loss_local = 0; + + for (int i = 0; i < outer_num_; ++i) { + weighted_sum_local = 0; + loss_local = 0; + + #ifdef _OPENMP + #pragma omp parallel for reduction(+: loss_local, weighted_sum_local) if(inner_num_ > 1) + #endif + for (int j = 0; j < inner_num_; j++) { + const int label_value = static_cast(label[i * inner_num_ + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + continue; + } + + DCHECK_GE(label_value, 0); + DCHECK_LT(label_value, prob_.shape(softmax_axis_)); + Dtype p = prob_data[i * dim + label_value * inner_num_ + j]; + loss_local += weights[i * inner_num_ + j] * log(std::max(Dtype(FLT_MIN), std::min(p, Dtype(1.0 - FLT_MIN)))); + weighted_sum_local += weights[i * inner_num_ + j]; + } + + weighted_sum += weighted_sum_local; + loss -= loss_local; + } + + top[0]->mutable_cpu_data()[0] = loss / weighted_sum; + if (top.size() == 2) { + top[1]->ShareData(prob_); + } + } else { + int count_local = 0; + Dtype loss_local = 0; + + for (int i = 0; i < outer_num_; ++i) { + count_local = 0; + loss_local = 0; + + #ifdef _OPENMP + #pragma omp parallel for reduction(+: loss_local, count_local) if(inner_num_ > 1) + #endif + for (int j = 0; j < inner_num_; j++) { + const int label_value = static_cast(label[i * inner_num_ + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + continue; + } + + DCHECK_GE(label_value, 0); + DCHECK_LT(label_value, prob_.shape(softmax_axis_)); + Dtype p = prob_data[i * dim + label_value * inner_num_ + j]; + loss_local += log(std::max(Dtype(FLT_MIN), std::min(p, Dtype(1.0 - FLT_MIN)))); + ++count_local; + } + + count += count_local; + loss -= loss_local; + } + + Dtype normalizer = LossLayer::GetNormalizer(normalization_, outer_num_, inner_num_, count); + top[0]->mutable_cpu_data()[0] = loss / normalizer; + if (top.size() == 2) { + top[1]->ShareData(prob_); } - DCHECK_GE(label_value, 0); - DCHECK_LT(label_value, prob_.shape(softmax_axis_)); - loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], - Dtype(FLT_MIN))); - ++count; - } - } - Dtype normalizer = LossLayer::GetNormalizer( - normalization_, outer_num_, inner_num_, count); - top[0]->mutable_cpu_data()[0] = loss / normalizer; - if (top.size() == 2) { - top[1]->ShareData(prob_); } } @@ -161,30 +210,58 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { - Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - const Dtype* prob_data = prob_.cpu_data(); - caffe_copy(prob_.count(), prob_data, bottom_diff); - const Dtype* label = bottom[1]->cpu_data(); - int dim = prob_.count() / outer_num_; - int count = 0; - for (int i = 0; i < outer_num_; ++i) { - for (int j = 0; j < inner_num_; ++j) { - const int label_value = static_cast(label[i * inner_num_ + j]); - if (has_ignore_label_ && label_value == ignore_label_) { - for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { - bottom_diff[i * dim + c * inner_num_ + j] = 0; + if (bottom.size() == 3) { + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + const Dtype* prob_data = prob_.cpu_data(); + caffe_copy(prob_.count(), prob_data, bottom_diff); + const Dtype* label = bottom[1]->cpu_data(); + int dim = prob_.count() / outer_num_; + Dtype weight_sum = Dtype(0); + const Dtype* weights = bottom[2]->cpu_data(); + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; ++j) { + const int label_value = static_cast(label[i * inner_num_ + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { + bottom_diff[i * dim + c * inner_num_ + j] = 0; + } + } else { + bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; + for (int c = 0; c < bottom[0]->shape(1); ++c) { + bottom_diff[i * dim + c * inner_num_ + j] *= weights[i * inner_num_ + j]; + } + weight_sum += weights[i * inner_num_ + j]; + } } - } else { - bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; - ++count; } - } + + Dtype loss_weight = top[0]->cpu_diff()[0] / weight_sum; + caffe_scal(prob_.count(), loss_weight, bottom_diff); + } else { + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + const Dtype* prob_data = prob_.cpu_data(); + caffe_copy(prob_.count(), prob_data, bottom_diff); + const Dtype* label = bottom[1]->cpu_data(); + int dim = prob_.count() / outer_num_; + int count = 0; + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; ++j) { + const int label_value = static_cast(label[i * inner_num_ + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { + bottom_diff[i * dim + c * inner_num_ + j] = 0; + } + } else { + bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; + ++count; + } + } + } + // Scale gradient + Dtype normalizer = LossLayer::GetNormalizer(normalization_, outer_num_, inner_num_, count); + Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer; + caffe_scal(prob_.count(), loss_weight, bottom_diff); } - // Scale gradient - Dtype normalizer = LossLayer::GetNormalizer( - normalization_, outer_num_, inner_num_, count); - Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer; - caffe_scal(prob_.count(), loss_weight, bottom_diff); } } diff --git a/src/caffe/mkldnn_memory.cpp b/src/caffe/mkldnn_memory.cpp index a14ef5084..ddad67f57 100644 --- a/src/caffe/mkldnn_memory.cpp +++ b/src/caffe/mkldnn_memory.cpp @@ -81,6 +81,7 @@ void MKLDNNMemoryDescriptorBase::create_reorder_descriptors() if ( *_usr_memory_pd != *_prv_memory_pd) { _reorder_usr2prv_pd = shared_ptr( new reorder::primitive_desc(*_usr_memory_pd, *_prv_memory_pd)); + _reorder_prv2usr_pd = shared_ptr( new reorder::primitive_desc(*_prv_memory_pd, *_usr_memory_pd)); } @@ -98,9 +99,17 @@ template : MKLDNNMemoryDescriptorBase(usr_memory_pd, prv_memory_pd, blob, mkldnn_layer) { const Dtype* prv_ptr = is_diff ? blob->prv_diff() : blob->prv_data(); + if (prv_ptr != NULL) { shared_ptr > blob_prv_mkldnn_mem_descr = get_mkldnn_prv_descriptor(blob); +#ifdef DEBUG + LOG(INFO) << "Format of blob-prv-memory-pd: " << blob_prv_mkldnn_mem_descr->prv_memory_pd()->desc().data.format; + LOG(INFO) << "Format of this-prv-memory-pd: " << this->prv_memory_pd()->desc().data.format; +#endif if (*blob_prv_mkldnn_mem_descr->prv_memory_pd() != *this->prv_memory_pd()) { +#ifdef DEBUG + LOG(INFO) << "Formats of blob-prv-memory-pd and this-prv-memory-pd are not equal !"; +#endif this->set_extprv_memory_pd(blob_prv_mkldnn_mem_descr->prv_memory_pd()); } } @@ -126,10 +135,18 @@ void MKLDNNMemoryDescriptor::create_reorder_to_prv(void* cpu_ptr template void MKLDNNMemoryDescriptor::convert_to_prv(void* cpu_ptr) { +#ifdef DEBUG + LOG(INFO) << "--- MKLDNNMemoryDescriptorBase::convert_to_prv --- " << this->name; +#endif CHECK(cpu_ptr); CHECK_EQ(this->_cpu_ptr, cpu_ptr); create_reorder_to_prv(cpu_ptr); VLOG(1) << "--- MKLDNNMemoryDescriptorBase::convert_to_prv --- " << this->name; +#ifdef DEBUG + LOG(INFO) << "Reorder: from usr to prv."; + LOG(INFO) << "Format of _usr_memory_pd: " << this->_usr_memory_pd->desc().data.format; + LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format; +#endif PERFORMANCE_MEASUREMENT_BEGIN(); this->_reorder_usr2prv.submit(); PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion"); @@ -157,11 +174,19 @@ void MKLDNNMemoryDescriptor::create_reorder_from_prv(void* cpu_p template void MKLDNNMemoryDescriptor::convert_from_prv(void* cpu_ptr) { +#ifdef DEBUG + LOG(INFO) << "--- MKLDNNMemoryDescriptorBase::convert_from_prv --- " << this->name; +#endif CHECK(cpu_ptr); if(this->_reorder_prv2usr_pd == NULL) return; create_reorder_from_prv(cpu_ptr); VLOG(1) << "--- MKLDNNMemoryDescriptorBase::convert_from_prv --- " << this->name; +#ifdef DEBUG + LOG(INFO) << "Reorder: from prv to usr."; + LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format; + LOG(INFO) << "Format of _usr_memory_pd: " << this->_usr_memory_pd->desc().data.format; +#endif PERFORMANCE_MEASUREMENT_BEGIN(); this->_reorder_prv2usr.submit(); PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion"); @@ -181,11 +206,26 @@ void MKLDNNMemoryDescriptor::create_reorder_from_extprv(shared_p template void MKLDNNMemoryDescriptor::convert_from_extprv(shared_ptr aprimitive) { +#ifdef DEBUG + LOG(INFO) << "--- MKLDNNMemoryDescriptorBase::convert_from_extprv --- " << this->name; +#endif CHECK(aprimitive); if(this->_reorder_extprv2prv_pd == NULL) return; + if (this->_extprv_memory_pd->desc().data.format == this->_prv_memory_pd->desc().data.format) + { +#ifdef DEBUG + LOG(INFO) << "The format of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; +#endif + return; + } create_reorder_from_extprv(aprimitive); VLOG(1) << "--- MKLDNNMemoryDescriptorBase::convert_from_extprv --- " << this->name; +#ifdef DEBUG + LOG(INFO) << "Reorder: from extprv to prv."; + LOG(INFO) << "Format of _extprv_memory_pd: " << this->_extprv_memory_pd->desc().data.format; + LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format; +#endif PERFORMANCE_MEASUREMENT_BEGIN(); this->_reorder_extprv2prv.submit(); PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion"); @@ -290,14 +330,22 @@ void MKLDNNMemoryDescriptor::sync_before_read() // if blob has not prv descriptor then set it to avoid conversions on next iterations if (is_diff) { this->_blob->set_prv_diff_descriptor(this->get_shared_ptr(), false); + // Original: // below line designated to set correspondent SyncedMemory->_head to HEAD_AT_CPU // TODO: need to optimize - this->_blob->set_prv_diff_descriptor(NULL); + //this->_blob->set_prv_diff_descriptor(NULL); + // It will lead the performance drop in two aspects: + // 1. FWD Conv: Reorder of weights from oihw to OIhw16i16o is executed for every iteration. This should be happening only once per convolution layer including all iterations. + // 2. BWD Conv: Reorder of weights is happening from oihw to OIhw16o16i format, where as expected, the reorder should happen from OIhw16i16o to OIhw16o16i for better performance. } else { - this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), false); + this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), true); //Change from false to true, suggested by Czaja, Jacek + // Original: // below line designated to set correspondent SyncedMemory->_head to HEAD_AT_CPU // TODO: need to optimize - this->_blob->set_prv_data_descriptor(NULL); + //this->_blob->set_prv_data_descriptor(NULL); + // It will lead the performance drop in two aspects: + // 1. FWD Conv: Reorder of weights from oihw to OIhw16i16o is executed for every iteration. This should be happening only once per convolution layer including all iterations. + // 2. BWD Conv: Reorder of weights is happening from oihw to OIhw16o16i format, where as expected, the reorder should happen from OIhw16i16o to OIhw16o16i for better performance. } } else { shared_ptr > blob_prv_mkldnn_mem_descr = get_mkldnn_prv_descriptor(this->_blob); @@ -326,6 +374,16 @@ void MKLDNNMemoryDescriptor::sync_before_write(bool inplace) this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), this->conversion_needed() ? false : true); } } + //Fix me: this->conversion_needed() == false means diff/data is in the CPU, no need to set the prv_diff/data_descriptor + /* + if ((!inplace) && (this->conversion_needed())) { + if (is_diff) { + this->_blob->set_prv_diff_descriptor(this->get_shared_ptr(), false); + } else { + this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), false); + } + } + */ } template @@ -372,7 +430,7 @@ shared_ptr MKLDNNMemoryDescriptor::create_input(bool template shared_ptr MKLDNNMemoryDescriptor::create_output_memory(bool inplace) { - // TODO: need to iptimize code + // TODO: need to optimize code shared_ptr omem = create_output_memory(this->_blob); if(!inplace) { if(is_diff) { @@ -381,6 +439,16 @@ shared_ptr MKLDNNMemoryDescriptor::create_output_memory( this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), this->conversion_needed() ? false : true); } } + /* + //Fix me: this->conversion_needed() == false means diff/data is in the CPU, no need to set the prv_diff/data_descriptor + if ((!inplace) && (this->conversion_needed())) { + if (is_diff) { + this->_blob->set_prv_diff_descriptor(this->get_shared_ptr(), false); + } else { + this->_blob->set_prv_data_descriptor(this->get_shared_ptr(), false); + } + } + */ return omem; } diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 6455a2989..25e978856 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -92,8 +92,10 @@ inline void SyncedMemory::to_cpu() { case SYNCED_PRV: case HEAD_AT_CPU: if (prv_descriptor_.get()) { - if ( prv_descriptor_->on_to_cpu()) - head_ = SYNCED; + if (prv_descriptor_->on_to_cpu()) + //Fix: head_ = SYNCED means for caffe that CPU and GPU are in sync, + //as we do not have GPU setting, head_ to SYNCED will cause problems. + head_ = SYNCED_PRV; } break; case SYNCED: diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp index 4ed88d1ed..2e49dd86a 100644 --- a/src/caffe/test/test_hdf5_output_layer.cpp +++ b/src/caffe/test/test_hdf5_output_layer.cpp @@ -153,6 +153,8 @@ TYPED_TEST(HDF5OutputLayerTest, TestForward) { status = H5Fclose(file_id); EXPECT_GE(status, 0) << "Failed to close HDF5 file " << this->output_file_name_; + + delete blob_label; } } // namespace caffe diff --git a/src/caffe/test/test_inner_product_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp index 2e42c0382..278b1308b 100644 --- a/src/caffe/test/test_inner_product_layer.cpp +++ b/src/caffe/test/test_inner_product_layer.cpp @@ -243,6 +243,8 @@ TYPED_TEST(InnerProductLayerTest, TestForwardTranspose) { for (int i = 0; i < count; ++i) { EXPECT_FLOAT_EQ(data[i], data_t[i]); } + delete top; + delete top_t; } else { LOG(ERROR) << "Skipping test due to old architecture."; } @@ -425,6 +427,10 @@ TYPED_TEST(InnerProductLayerTest, TestBackwardTranspose) { EXPECT_NE(Dtype(0.), data[i]); EXPECT_FLOAT_EQ(data[i], data_t[i]); } + delete bottom_diff; + delete diff; + delete w; + delete top; } else { LOG(ERROR) << "Skipping test due to old architecture."; } diff --git a/src/caffe/test/test_mkldnn_convolution_layer.cpp b/src/caffe/test/test_mkldnn_convolution_layer.cpp index 14e4fd76e..551ab7bd2 100644 --- a/src/caffe/test/test_mkldnn_convolution_layer.cpp +++ b/src/caffe/test/test_mkldnn_convolution_layer.cpp @@ -1,1003 +1,1051 @@ -/* -All modification made by Intel Corporation: © 2016 Intel Corporation - -All contributions by the University of California: -Copyright (c) 2014, 2015, The Regents of the University of California (Regents) -All rights reserved. - -All other contributions: -Copyright (c) 2014, 2015, the respective contributors -All rights reserved. -For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef MKLDNN_SUPPORTED -#include - -#include "gtest/gtest.h" - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/filler.hpp" -#include "caffe/layers/mkldnn_layers.hpp" - -#include "caffe/test/test_caffe_main.hpp" -#include "caffe/test/test_gradient_check_util.hpp" - -namespace caffe { - -// Reference convolution for checking results: -// accumulate through explicit loops over input, output, and filters. -template -void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, - const vector > >& weights, - Blob* out) { - const bool has_depth = (out->num_axes() == 5); - if (!has_depth) { CHECK_EQ(4, out->num_axes()); } - // Kernel size, stride, and pad - int kernel_h, kernel_w; - if (conv_param->has_kernel_h() || conv_param->has_kernel_w()) { - kernel_h = conv_param->kernel_h(); - kernel_w = conv_param->kernel_w(); - } else { - kernel_h = kernel_w = conv_param->kernel_size(0); - } - int pad_h, pad_w; - if (conv_param->has_pad_h() || conv_param->has_pad_w()) { - pad_h = conv_param->pad_h(); - pad_w = conv_param->pad_w(); - } else { - pad_h = pad_w = conv_param->pad_size() ? conv_param->pad(0) : 0; - } - int stride_h, stride_w; - if (conv_param->has_stride_h() || conv_param->has_stride_w()) { - stride_h = conv_param->stride_h(); - stride_w = conv_param->stride_w(); - } else { - stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; - } - int dilation_h, dilation_w; - dilation_h = dilation_w = conv_param->dilation_size() ? - conv_param->dilation(0) : 1; - int kernel_d, pad_d, stride_d, dilation_d; - if (has_depth) { - kernel_d = kernel_h; - stride_d = stride_h; - pad_d = pad_h; - dilation_d = dilation_h; - } else { - kernel_d = stride_d = dilation_d = 1; - pad_d = 0; - } - // Groups - int groups = conv_param->group(); - int o_g = out->shape(1) / groups; - int k_g = in->shape(1) / groups; - int o_head, k_head; - // Convolution - vector weight_offset(4 + has_depth); - vector in_offset(4 + has_depth); - vector out_offset(4 + has_depth); - Dtype* out_data = out->mutable_cpu_data(); - for (int n = 0; n < out->shape(0); n++) { - for (int g = 0; g < groups; g++) { - o_head = o_g * g; - k_head = k_g * g; - for (int o = 0; o < o_g; o++) { - for (int k = 0; k < k_g; k++) { - for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { - for (int y = 0; y < out->shape(2 + has_depth); y++) { - for (int x = 0; x < out->shape(3 + has_depth); x++) { - for (int r = 0; r < kernel_d; r++) { - for (int p = 0; p < kernel_h; p++) { - for (int q = 0; q < kernel_w; q++) { - int in_z = z * stride_d - pad_d + r * dilation_d; - int in_y = y * stride_h - pad_h + p * dilation_h; - int in_x = x * stride_w - pad_w + q * dilation_w; - if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) - && in_y >= 0 && in_y < in->shape(2 + has_depth) - && in_x >= 0 && in_x < in->shape(3 + has_depth)) { - weight_offset[0] = o + o_head; - weight_offset[1] = k; - if (has_depth) { weight_offset[2] = r; } - weight_offset[2 + has_depth] = p; - weight_offset[3 + has_depth] = q; - in_offset[0] = n; - in_offset[1] = k + k_head; - if (has_depth) { in_offset[2] = in_z; } - in_offset[2 + has_depth] = in_y; - in_offset[3 + has_depth] = in_x; - out_offset[0] = n; - out_offset[1] = o + o_head; - if (has_depth) { out_offset[2] = z; } - out_offset[2 + has_depth] = y; - out_offset[3 + has_depth] = x; - out_data[out->offset(out_offset)] += - in->data_at(in_offset) - * weights[0]->data_at(weight_offset); - } - } - } - } - } - } - } - } - } - } - } - // Bias - if (conv_param->bias_term()) { - const Dtype* bias_data = weights[1]->cpu_data(); - for (int n = 0; n < out->shape(0); n++) { - for (int o = 0; o < out->shape(1); o++) { - for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { - for (int y = 0; y < out->shape(2 + has_depth); y++) { - for (int x = 0; x < out->shape(3 + has_depth); x++) { - out_offset[0] = n; - out_offset[1] = o; - if (has_depth) { out_offset[2] = z; } - out_offset[2 + has_depth] = y; - out_offset[3 + has_depth] = x; - out_data[out->offset(out_offset)] += bias_data[o]; - } - } - } - } - } - } - //relu - if (conv_param->relu()){ - for (int n = 0; n < out->shape(0); n++) { - for (int o = 0; o < out->shape(1); o++) { - for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { - for (int y = 0; y < out->shape(2 + has_depth); y++) { - for (int x = 0; x < out->shape(3 + has_depth); x++) { - out_offset[0] = n; - out_offset[1] = o; - if (has_depth) { out_offset[2] = z; } - out_offset[2 + has_depth] = y; - out_offset[3 + has_depth] = x; - if(out_data[out->offset(out_offset)] < 0) out_data[out->offset(out_offset)] = 0; - } - } - } - } - } - } -} - -template void caffe_conv(const Blob* in, - ConvolutionParameter* conv_param, - const vector > >& weights, - Blob* out); -template void caffe_conv(const Blob* in, - ConvolutionParameter* conv_param, - const vector > >& weights, - Blob* out); - -template -class MKLDNNConvolutionLayerTest : public MultiDeviceTest { - typedef typename TypeParam::Dtype Dtype; - - -#define MB 2 -#define IC 8 -#define OC 8 -#define IH 5 -#define IW 5 -#define OH 5 -#define OW 5 -#define KH 3 -#define KW 3 -#define CS 1 -#define GR 2 -#define PD 1 - - protected: - MKLDNNConvolutionLayerTest() - : blob_bottom_(new Blob(MB, IC, IH, IW)), - blob_bottom_2_(new Blob(MB, IC, IH, IW)), - blob_top_(new Blob()), - blob_top_2_(new Blob()) {} - virtual void SetUp() { - // fill the values - FillerParameter filler_param; - filler_param.set_value(1.); - GaussianFiller filler(filler_param); - filler.Fill(this->blob_bottom_); - filler.Fill(this->blob_bottom_2_); - blob_bottom_vec_.push_back(blob_bottom_); - blob_top_vec_.push_back(blob_top_); - } - - virtual ~MKLDNNConvolutionLayerTest() { - delete blob_bottom_; - delete blob_bottom_2_; - delete blob_top_; - delete blob_top_2_; - } - - virtual Blob* MakeReferenceTop(Blob* top) { - this->ref_blob_top_.reset(new Blob()); - this->ref_blob_top_->ReshapeLike(*top); - return this->ref_blob_top_.get(); - } - - Blob* const blob_bottom_; - Blob* const blob_bottom_2_; - Blob* const blob_top_; - Blob* const blob_top_2_; - shared_ptr > ref_blob_top_; - vector*> blob_bottom_vec_; - vector*> blob_top_vec_; -}; - -typedef ::testing::Types -// ,CPUDevice - > TestDtypesCPU; - -TYPED_TEST_CASE(MKLDNNConvolutionLayerTest, TestDtypesCPU); - -TYPED_TEST(MKLDNNConvolutionLayerTest, TestSetupMKLDNN) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(KH); - convolution_param->add_stride(CS); - convolution_param->set_num_output(OC); - convolution_param->add_pad(PD); - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - EXPECT_EQ(this->blob_top_->num(), MB); - EXPECT_EQ(this->blob_top_->channels(), OC); - EXPECT_EQ(this->blob_top_->height(), OH); - EXPECT_EQ(this->blob_top_->width(), OW); - EXPECT_EQ(this->blob_top_2_->num(), MB); - EXPECT_EQ(this->blob_top_2_->channels(), OC ); - EXPECT_EQ(this->blob_top_2_->height(), OH); - EXPECT_EQ(this->blob_top_2_->width(), OW); - // setting group should not change the shape - convolution_param->set_num_output(OC); - convolution_param->set_group(GR); - layer.reset(new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - EXPECT_EQ(this->blob_top_->num(), MB); - EXPECT_EQ(this->blob_top_->channels(), OC); - EXPECT_EQ(this->blob_top_->height(), OH); - EXPECT_EQ(this->blob_top_->width(), OW); - EXPECT_EQ(this->blob_top_2_->num(), MB); - EXPECT_EQ(this->blob_top_2_->channels(), OC); - EXPECT_EQ(this->blob_top_2_->height(), OH); - EXPECT_EQ(this->blob_top_2_->width(), OW); -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestSimpleConvolutionMKLDNN) { - typedef typename TypeParam::Dtype Dtype; - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(KH); - convolution_param->add_stride(CS); - convolution_param->set_num_output(OC); - convolution_param->add_pad(PD); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } - -#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector - caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_2_)); - top_data = this->blob_top_2_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -#endif -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestSimpleConvolutionReLUMKLDNN) { - typedef typename TypeParam::Dtype Dtype; - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(OC); - convolution_param->set_relu(true); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilatedConvolutionMKLDNN) { - typedef typename TypeParam::Dtype Dtype; - vector bottom_shape; - bottom_shape.push_back(2); - bottom_shape.push_back(3); - bottom_shape.push_back(8); - bottom_shape.push_back(7); - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_dilation(2); - convolution_param->set_num_output(4); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector - caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_2_)); - top_data = this->blob_top_2_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -#endif -} -#endif - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, Test0DConvolutionMKLDNN) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - const int kNumOutput = 3; - convolution_param->set_num_output(kNumOutput); - convolution_param->set_axis(3); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - vector top_shape = this->blob_bottom_->shape(); - top_shape[3] = kNumOutput; - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - EXPECT_EQ(top_shape, this->blob_top_->shape()); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - vector weight_offset(2); - const Blob* weight = layer->blobs()[0].get(); - const Blob* bias = layer->blobs()[1].get(); - const int num = this->blob_top_->count(3); - const int dim = this->blob_top_->shape(3); - const int bottom_dim = this->blob_bottom_->shape(3); - for (int n = 0; n < num; ++n) { - for (int d = 0; d < dim; ++d) { - weight_offset[0] = d; - Dtype value = bias->cpu_data()[d]; - for (int bottom_d = 0; bottom_d < bottom_dim; ++bottom_d) { - weight_offset[1] = bottom_d; - value += weight->data_at(weight_offset) * - this->blob_bottom_->cpu_data()[n * bottom_dim + bottom_d]; - } - EXPECT_NEAR(value, this->blob_top_->cpu_data()[n * dim + d], 1e-4); - } - } -} -#endif - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimple3DConvolution) { - typedef typename TypeParam::Dtype Dtype; - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 5; - bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); - bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(4); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } - -#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector - caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_2_)); - top_data = this->blob_top_2_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -#endif -} -#endif - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilated3DConvolution) { - typedef typename TypeParam::Dtype Dtype; - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 6; - bottom_shape[3] = 7; - bottom_shape[4] = 8; - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_dilation(2); - convolution_param->set_num_output(4); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } - caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_2_)); - top_data = this->blob_top_2_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} -#endif - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_Test1x1Convolution) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(1); - convolution_param->add_stride(1); - convolution_param->set_num_output(OC); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_Test1x1ConvolutionReLU) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(1); - convolution_param->add_stride(1); - convolution_param->set_num_output(OC); - convolution_param->set_relu(true); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestSimpleConvolutionGroup) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(KH); - convolution_param->add_stride(CS); - convolution_param->set_num_output(OC); - convolution_param->set_group(GR); - convolution_param->add_pad(PD); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestSimpleConvolutionReLUGroup) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(OC); - convolution_param->set_relu(true); - convolution_param->set_group(GR); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("constant"); - convolution_param->mutable_bias_filler()->set_value(0.1); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestSobelConvolution) { - // Test separable convolution by computing the Sobel operator - // as a single filter then comparing the result - // as the convolution of two rectangular filters. - typedef typename TypeParam::Dtype Dtype; - // Fill bottoms with identical Gaussian noise. - shared_ptr > filler; - FillerParameter filler_param; - filler_param.set_value(1.); - filler.reset(new GaussianFiller(filler_param)); - filler->Fill(this->blob_bottom_); - this->blob_bottom_2_->CopyFrom(*this->blob_bottom_); - // Compute Sobel G_x operator as 3 x 3 convolution. - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(1); - convolution_param->set_bias_term(false); - shared_ptr > layer( - new MKLDNNConvolutionLayer(layer_param)); - layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 3, 3, 3)); - Dtype* weights = layer->blobs()[0]->mutable_cpu_data(); - for (int c = 0; c < 3; ++c) { - int i = c * 9; // 3 x 3 filter - weights[i + 0] = -1; - weights[i + 1] = 0; - weights[i + 2] = 1; - weights[i + 3] = -2; - weights[i + 4] = 0; - weights[i + 5] = 2; - weights[i + 6] = -1; - weights[i + 7] = 0; - weights[i + 8] = 1; - } - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Compute Sobel G_x operator as separable 3 x 1 and 1 x 3 convolutions. - // (1) the [1 2 1] column filter - vector*> sep_blob_bottom_vec; - vector*> sep_blob_top_vec; - shared_ptr > blob_sep(new Blob()); - sep_blob_bottom_vec.push_back(this->blob_bottom_2_); - sep_blob_top_vec.push_back(this->blob_top_2_); - convolution_param->clear_kernel_size(); - convolution_param->clear_stride(); - convolution_param->set_kernel_h(3); - convolution_param->set_kernel_w(1); - convolution_param->set_stride_h(2); - convolution_param->set_stride_w(1); - convolution_param->set_num_output(1); - convolution_param->set_bias_term(false); - layer.reset(new MKLDNNConvolutionLayer(layer_param)); - layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 3, 3, 1)); - Dtype* weights_1 = layer->blobs()[0]->mutable_cpu_data(); - for (int c = 0; c < 3; ++c) { - int i = c * 3; // 3 x 1 filter - weights_1[i + 0] = 1; - weights_1[i + 1] = 2; - weights_1[i + 2] = 1; - } - layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); - layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); - // (2) the [-1 0 1] row filter - blob_sep->CopyFrom(*this->blob_top_2_, false, true); - sep_blob_bottom_vec.clear(); - sep_blob_bottom_vec.push_back(blob_sep.get()); - convolution_param->set_kernel_h(1); - convolution_param->set_kernel_w(3); - convolution_param->set_stride_h(1); - convolution_param->set_stride_w(2); - convolution_param->set_num_output(1); - convolution_param->set_bias_term(false); - layer.reset(new MKLDNNConvolutionLayer(layer_param)); - layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); - Dtype* weights_2 = layer->blobs()[0]->mutable_cpu_data(); - weights_2[0] = -1; - weights_2[1] = 0; - weights_2[2] = 1; - layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); - layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); - // Test equivalence of full and separable filters. - const Dtype* top_data = this->blob_top_->cpu_data(); - const Dtype* sep_top_data = this->blob_top_2_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], sep_top_data[i], 1e-4); - } -} -#endif - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestNDAgainst2D) { - typedef typename TypeParam::Dtype Dtype; - const int kernel_h = 11; - const int kernel_w = 13; - vector bottom_shape(4); - bottom_shape[0] = 15; - bottom_shape[1] = 18; - bottom_shape[2] = kernel_h * 2; - bottom_shape[3] = kernel_w * 2; - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->set_num_output(12); - convolution_param->set_bias_term(false); - convolution_param->set_group(6); - convolution_param->set_kernel_h(kernel_h); - convolution_param->set_kernel_w(kernel_w); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - Blob weights; - Blob top_diff; - // Shape and fill weights and top_diff. - bool copy_diff; - bool reshape; - { - MKLDNNConvolutionLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - top_diff.ReshapeLike(*this->blob_top_); - filler.Fill(&top_diff); - ASSERT_EQ(1, layer.blobs().size()); - copy_diff = false; reshape = true; - weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); - } - vector propagate_down(1, true); - Blob result_2d; - Blob backward_result_2d; - Blob backward_weight_result_2d; - // Test with 2D im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_2d. - convolution_param->set_force_nd_im2col(false); - MKLDNNConvolutionLayer layer_2d(layer_param); - layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_2d.blobs().size()); - copy_diff = false; reshape = false; - layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_2d. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_2d.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); - } - Blob result_nd; - Blob backward_result_nd; - Blob backward_weight_result_nd; - // Test with ND im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_nd. - convolution_param->set_force_nd_im2col(true); - MKLDNNConvolutionLayer layer_nd(layer_param); - layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_nd.blobs().size()); - copy_diff = false; reshape = false; - layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_nd. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_nd.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); - } - ASSERT_EQ(result_nd.count(), result_2d.count()); - for (int i = 0; i < result_2d.count(); ++i) { - EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); - } - ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); - for (int i = 0; i < backward_result_2d.count(); ++i) { - EXPECT_EQ(backward_result_2d.cpu_diff()[i], - backward_result_nd.cpu_diff()[i]); - } - ASSERT_EQ(backward_weight_result_nd.count(), - backward_weight_result_2d.count()); - for (int i = 0; i < backward_weight_result_2d.count(); ++i) { - EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], - backward_weight_result_nd.cpu_diff()[i]); - } -} -#endif - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestGradient) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - -// TODO: improve conv so that it runs on all buffers in bottom vector - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(KH); - convolution_param->add_stride(CS); - convolution_param->set_num_output(OC); - convolution_param->add_pad(PD); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - MKLDNNConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilatedGradient) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - vector bottom_shape; - bottom_shape.push_back(2); - bottom_shape.push_back(3); - bottom_shape.push_back(5); - bottom_shape.push_back(6); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - } - convolution_param->add_kernel_size(3); - convolution_param->add_dilation(2); - convolution_param->set_num_output(2); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - MKLDNNConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} -#endif - -#if 0 -TYPED_TEST(MKLDNNConvolutionLayerTest, TestGradient3D) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 5; - bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); - bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(2); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - MKLDNNConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} -#endif - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_Test1x1Gradient) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(1); - convolution_param->add_stride(1); - convolution_param->set_num_output(2); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - MKLDNNConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestGradientGroup) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(2); - convolution_param->set_group(GR); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - MKLDNNConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -} // namespace caffe -#endif // #ifdef MKLDNN_SUPPORTED +/* +All modification made by Intel Corporation: © 2016 Intel Corporation + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. +For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef MKLDNN_SUPPORTED +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/mkldnn_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +// Reference convolution for checking results: +// accumulate through explicit loops over input, output, and filters. +template +void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, + const vector > >& weights, + Blob* out) { + const bool has_depth = (out->num_axes() == 5); + if (!has_depth) { CHECK_EQ(4, out->num_axes()); } + // Kernel size, stride, and pad + int kernel_h, kernel_w; + if (conv_param->has_kernel_h() || conv_param->has_kernel_w()) { + kernel_h = conv_param->kernel_h(); + kernel_w = conv_param->kernel_w(); + } else { + kernel_h = kernel_w = conv_param->kernel_size(0); + } + int pad_h, pad_w; + if (conv_param->has_pad_h() || conv_param->has_pad_w()) { + pad_h = conv_param->pad_h(); + pad_w = conv_param->pad_w(); + } else { + pad_h = pad_w = conv_param->pad_size() ? conv_param->pad(0) : 0; + } + int stride_h, stride_w; + if (conv_param->has_stride_h() || conv_param->has_stride_w()) { + stride_h = conv_param->stride_h(); + stride_w = conv_param->stride_w(); + } else { + stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; + } + int dilation_h, dilation_w; + dilation_h = dilation_w = conv_param->dilation_size() ? + conv_param->dilation(0) : 1; + int kernel_d, pad_d, stride_d, dilation_d; + if (has_depth) { + kernel_d = kernel_h; + stride_d = stride_h; + pad_d = pad_h; + dilation_d = dilation_h; + } else { + kernel_d = stride_d = dilation_d = 1; + pad_d = 0; + } + // Groups + int groups = conv_param->group(); + int o_g = out->shape(1) / groups; + int k_g = in->shape(1) / groups; + int o_head, k_head; + // Convolution + vector weight_offset(4 + has_depth); + vector in_offset(4 + has_depth); + vector out_offset(4 + has_depth); + Dtype* out_data = out->mutable_cpu_data(); + for (int n = 0; n < out->shape(0); n++) { + for (int g = 0; g < groups; g++) { + o_head = o_g * g; + k_head = k_g * g; + for (int o = 0; o < o_g; o++) { + for (int k = 0; k < k_g; k++) { + for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { + for (int y = 0; y < out->shape(2 + has_depth); y++) { + for (int x = 0; x < out->shape(3 + has_depth); x++) { + for (int r = 0; r < kernel_d; r++) { + for (int p = 0; p < kernel_h; p++) { + for (int q = 0; q < kernel_w; q++) { + int in_z = z * stride_d - pad_d + r * dilation_d; + int in_y = y * stride_h - pad_h + p * dilation_h; + int in_x = x * stride_w - pad_w + q * dilation_w; + if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) + && in_y >= 0 && in_y < in->shape(2 + has_depth) + && in_x >= 0 && in_x < in->shape(3 + has_depth)) { + weight_offset[0] = o + o_head; + weight_offset[1] = k; + if (has_depth) { weight_offset[2] = r; } + weight_offset[2 + has_depth] = p; + weight_offset[3 + has_depth] = q; + in_offset[0] = n; + in_offset[1] = k + k_head; + if (has_depth) { in_offset[2] = in_z; } + in_offset[2 + has_depth] = in_y; + in_offset[3 + has_depth] = in_x; + out_offset[0] = n; + out_offset[1] = o + o_head; + if (has_depth) { out_offset[2] = z; } + out_offset[2 + has_depth] = y; + out_offset[3 + has_depth] = x; + out_data[out->offset(out_offset)] += + in->data_at(in_offset) + * weights[0]->data_at(weight_offset); + } + } + } + } + } + } + } + } + } + } + } + // Bias + if (conv_param->bias_term()) { + const Dtype* bias_data = weights[1]->cpu_data(); + for (int n = 0; n < out->shape(0); n++) { + for (int o = 0; o < out->shape(1); o++) { + for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { + for (int y = 0; y < out->shape(2 + has_depth); y++) { + for (int x = 0; x < out->shape(3 + has_depth); x++) { + out_offset[0] = n; + out_offset[1] = o; + if (has_depth) { out_offset[2] = z; } + out_offset[2 + has_depth] = y; + out_offset[3 + has_depth] = x; + out_data[out->offset(out_offset)] += bias_data[o]; + } + } + } + } + } + } + //relu + if (conv_param->relu()){ + for (int n = 0; n < out->shape(0); n++) { + for (int o = 0; o < out->shape(1); o++) { + for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { + for (int y = 0; y < out->shape(2 + has_depth); y++) { + for (int x = 0; x < out->shape(3 + has_depth); x++) { + out_offset[0] = n; + out_offset[1] = o; + if (has_depth) { out_offset[2] = z; } + out_offset[2 + has_depth] = y; + out_offset[3 + has_depth] = x; + if(out_data[out->offset(out_offset)] < 0) out_data[out->offset(out_offset)] = 0; + } + } + } + } + } + } +} + +template void caffe_conv(const Blob* in, + ConvolutionParameter* conv_param, + const vector > >& weights, + Blob* out); +template void caffe_conv(const Blob* in, + ConvolutionParameter* conv_param, + const vector > >& weights, + Blob* out); + +template +class MKLDNNConvolutionLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + +#define MB 2 +#define IC 8 +#define OC 8 +#define IH 5 +#define IW 5 +#define OH 5 +#define OW 5 +#define KH 3 +#define KW 3 +#define CS 1 +#define GR 2 +#define PD 1 + + protected: + MKLDNNConvolutionLayerTest() + : blob_bottom_(new Blob(MB, IC, IH, IW)), + blob_bottom_2_(new Blob(MB, IC, IH, IW)), + blob_top_(new Blob()), + blob_top_2_(new Blob()) {} + virtual void SetUp() { + // fill the values + FillerParameter filler_param; + filler_param.set_value(1.); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_2_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~MKLDNNConvolutionLayerTest() { + delete blob_bottom_; + delete blob_bottom_2_; + delete blob_top_; + delete blob_top_2_; + } + + virtual Blob* MakeReferenceTop(Blob* top) { + this->ref_blob_top_.reset(new Blob()); + this->ref_blob_top_->ReshapeLike(*top); + return this->ref_blob_top_.get(); + } + + Blob* const blob_bottom_; + Blob* const blob_bottom_2_; + Blob* const blob_top_; + Blob* const blob_top_2_; + shared_ptr > ref_blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types +// ,CPUDevice + > TestDtypesCPU; + +TYPED_TEST_CASE(MKLDNNConvolutionLayerTest, TestDtypesCPU); + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSetupMKLDNN) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(KH); + convolution_param->add_stride(CS); + convolution_param->set_num_output(OC); + convolution_param->add_pad(PD); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), MB); + EXPECT_EQ(this->blob_top_->channels(), OC); + EXPECT_EQ(this->blob_top_->height(), OH); + EXPECT_EQ(this->blob_top_->width(), OW); + EXPECT_EQ(this->blob_top_2_->num(), MB); + EXPECT_EQ(this->blob_top_2_->channels(), OC ); + EXPECT_EQ(this->blob_top_2_->height(), OH); + EXPECT_EQ(this->blob_top_2_->width(), OW); + // setting group should not change the shape + convolution_param->set_num_output(OC); + convolution_param->set_group(GR); + layer.reset(new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), MB); + EXPECT_EQ(this->blob_top_->channels(), OC); + EXPECT_EQ(this->blob_top_->height(), OH); + EXPECT_EQ(this->blob_top_->width(), OW); + EXPECT_EQ(this->blob_top_2_->num(), MB); + EXPECT_EQ(this->blob_top_2_->channels(), OC); + EXPECT_EQ(this->blob_top_2_->height(), OH); + EXPECT_EQ(this->blob_top_2_->width(), OW); +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSetupMKLDNNWithRectangeKernelStridePad) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_kernel_h(4); + convolution_param->set_kernel_w(1); + convolution_param->set_stride_h(3); + convolution_param->set_stride_w(1); + convolution_param->set_num_output(OC); + convolution_param->set_pad_h(2); + convolution_param->set_pad_w(1); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(convolution_param->kernel_h(), 4); + EXPECT_EQ(layer->GetKernelHeight(), 4); + EXPECT_EQ(convolution_param->kernel_w(), 1); + EXPECT_EQ(layer->GetKernelWidth(), 1); + EXPECT_EQ(convolution_param->stride_h(), 3); + EXPECT_EQ(layer->GetStrideHeight(), 3); + EXPECT_EQ(convolution_param->stride_w(), 1); + EXPECT_EQ(layer->GetStrideWidth(), 1); + EXPECT_EQ(convolution_param->pad_h(), 2); + EXPECT_EQ(layer->GetPadHeight(), 2); + EXPECT_EQ(convolution_param->pad_w(), 1); + EXPECT_EQ(layer->GetPadWidth(), 1); + // setting group should not change the shape + convolution_param->set_num_output(OC); + convolution_param->set_group(GR); + layer.reset(new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(convolution_param->kernel_h(), 4); + EXPECT_EQ(layer->GetKernelHeight(), 4); + EXPECT_EQ(convolution_param->kernel_w(), 1); + EXPECT_EQ(layer->GetKernelWidth(), 1); + EXPECT_EQ(convolution_param->stride_h(), 3); + EXPECT_EQ(layer->GetStrideHeight(), 3); + EXPECT_EQ(convolution_param->stride_w(), 1); + EXPECT_EQ(layer->GetStrideWidth(), 1); + EXPECT_EQ(convolution_param->pad_h(), 2); + EXPECT_EQ(layer->GetPadHeight(), 2); + EXPECT_EQ(convolution_param->pad_w(), 1); + EXPECT_EQ(layer->GetPadWidth(), 1); +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimpleConvolutionMKLDNN) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(KH); + convolution_param->add_stride(CS); + convolution_param->set_num_output(OC); + convolution_param->add_pad(PD); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + +#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +#endif +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimpleConvolutionReLUMKLDNN) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(OC); + convolution_param->set_relu(true); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilatedConvolutionMKLDNN) { + typedef typename TypeParam::Dtype Dtype; + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(8); + bottom_shape.push_back(7); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +#endif +} +#endif + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, Test0DConvolutionMKLDNN) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + const int kNumOutput = 3; + convolution_param->set_num_output(kNumOutput); + convolution_param->set_axis(3); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + vector top_shape = this->blob_bottom_->shape(); + top_shape[3] = kNumOutput; + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(top_shape, this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + vector weight_offset(2); + const Blob* weight = layer->blobs()[0].get(); + const Blob* bias = layer->blobs()[1].get(); + const int num = this->blob_top_->count(3); + const int dim = this->blob_top_->shape(3); + const int bottom_dim = this->blob_bottom_->shape(3); + for (int n = 0; n < num; ++n) { + for (int d = 0; d < dim; ++d) { + weight_offset[0] = d; + Dtype value = bias->cpu_data()[d]; + for (int bottom_d = 0; bottom_d < bottom_dim; ++bottom_d) { + weight_offset[1] = bottom_d; + value += weight->data_at(weight_offset) * + this->blob_bottom_->cpu_data()[n * bottom_dim + bottom_d]; + } + EXPECT_NEAR(value, this->blob_top_->cpu_data()[n * dim + d], 1e-4); + } + } +} +#endif + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimple3DConvolution) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 5; + bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); + bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + +#if 0 // TODO: improve conv so that it runs on all buffers in bottom vector + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +#endif +} +#endif + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilated3DConvolution) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 6; + bottom_shape[3] = 7; + bottom_shape[4] = 8; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} +#endif + +TYPED_TEST(MKLDNNConvolutionLayerTest, Test1x1Convolution) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(1); + convolution_param->add_stride(1); + convolution_param->set_num_output(OC); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, Test1x1ConvolutionReLU) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(1); + convolution_param->add_stride(1); + convolution_param->set_num_output(OC); + convolution_param->set_relu(true); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimpleConvolutionGroup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(KH); + convolution_param->add_stride(CS); + convolution_param->set_num_output(OC); + convolution_param->set_group(GR); + convolution_param->add_pad(PD); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSimpleConvolutionReLUGroup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(OC); + convolution_param->set_relu(true); + convolution_param->set_group(GR); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestSobelConvolution) { + // Test separable convolution by computing the Sobel operator + // as a single filter then comparing the result + // as the convolution of two rectangular filters. + typedef typename TypeParam::Dtype Dtype; + // Fill bottoms with identical Gaussian noise. + shared_ptr > filler; + FillerParameter filler_param; + filler_param.set_value(1.); + filler.reset(new GaussianFiller(filler_param)); + filler->Fill(this->blob_bottom_); + this->blob_bottom_2_->CopyFrom(*this->blob_bottom_); + // Compute Sobel G_x operator as 3 x 3 convolution. + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(1); + convolution_param->set_bias_term(false); + shared_ptr > layer( + new MKLDNNConvolutionLayer(layer_param)); + layer->blobs().resize(1); + layer->blobs()[0].reset(new Blob(1, 3, 3, 3)); + Dtype* weights = layer->blobs()[0]->mutable_cpu_data(); + for (int c = 0; c < 3; ++c) { + int i = c * 9; // 3 x 3 filter + weights[i + 0] = -1; + weights[i + 1] = 0; + weights[i + 2] = 1; + weights[i + 3] = -2; + weights[i + 4] = 0; + weights[i + 5] = 2; + weights[i + 6] = -1; + weights[i + 7] = 0; + weights[i + 8] = 1; + } + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Compute Sobel G_x operator as separable 3 x 1 and 1 x 3 convolutions. + // (1) the [1 2 1] column filter + vector*> sep_blob_bottom_vec; + vector*> sep_blob_top_vec; + shared_ptr > blob_sep(new Blob()); + sep_blob_bottom_vec.push_back(this->blob_bottom_2_); + sep_blob_top_vec.push_back(this->blob_top_2_); + convolution_param->clear_kernel_size(); + convolution_param->clear_stride(); + convolution_param->set_kernel_h(3); + convolution_param->set_kernel_w(1); + convolution_param->set_stride_h(2); + convolution_param->set_stride_w(1); + convolution_param->set_num_output(1); + convolution_param->set_bias_term(false); + layer.reset(new MKLDNNConvolutionLayer(layer_param)); + layer->blobs().resize(1); + layer->blobs()[0].reset(new Blob(1, 3, 3, 1)); + Dtype* weights_1 = layer->blobs()[0]->mutable_cpu_data(); + for (int c = 0; c < 3; ++c) { + int i = c * 3; // 3 x 1 filter + weights_1[i + 0] = 1; + weights_1[i + 1] = 2; + weights_1[i + 2] = 1; + } + layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); + layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); + // (2) the [-1 0 1] row filter + blob_sep->CopyFrom(*this->blob_top_2_, false, true); + sep_blob_bottom_vec.clear(); + sep_blob_bottom_vec.push_back(blob_sep.get()); + convolution_param->set_kernel_h(1); + convolution_param->set_kernel_w(3); + convolution_param->set_stride_h(1); + convolution_param->set_stride_w(2); + convolution_param->set_num_output(1); + convolution_param->set_bias_term(false); + layer.reset(new MKLDNNConvolutionLayer(layer_param)); + layer->blobs().resize(1); + layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); + Dtype* weights_2 = layer->blobs()[0]->mutable_cpu_data(); + weights_2[0] = -1; + weights_2[1] = 0; + weights_2[2] = 1; + layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); + layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); + // Test equivalence of full and separable filters. + const Dtype* top_data = this->blob_top_->cpu_data(); + const Dtype* sep_top_data = this->blob_top_2_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], sep_top_data[i], 1e-4); + } +} +#endif + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestNDAgainst2D) { + typedef typename TypeParam::Dtype Dtype; + const int kernel_h = 11; + const int kernel_w = 13; + vector bottom_shape(4); + bottom_shape[0] = 15; + bottom_shape[1] = 18; + bottom_shape[2] = kernel_h * 2; + bottom_shape[3] = kernel_w * 2; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_num_output(12); + convolution_param->set_bias_term(false); + convolution_param->set_group(6); + convolution_param->set_kernel_h(kernel_h); + convolution_param->set_kernel_w(kernel_w); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + Blob weights; + Blob top_diff; + // Shape and fill weights and top_diff. + bool copy_diff; + bool reshape; + { + MKLDNNConvolutionLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + top_diff.ReshapeLike(*this->blob_top_); + filler.Fill(&top_diff); + ASSERT_EQ(1, layer.blobs().size()); + copy_diff = false; reshape = true; + weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); + } + vector propagate_down(1, true); + Blob result_2d; + Blob backward_result_2d; + Blob backward_weight_result_2d; + // Test with 2D im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_2d. + convolution_param->set_force_nd_im2col(false); + MKLDNNConvolutionLayer layer_2d(layer_param); + layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_2d.blobs().size()); + copy_diff = false; reshape = false; + layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_2d. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_2d.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); + } + Blob result_nd; + Blob backward_result_nd; + Blob backward_weight_result_nd; + // Test with ND im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_nd. + convolution_param->set_force_nd_im2col(true); + MKLDNNConvolutionLayer layer_nd(layer_param); + layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_nd.blobs().size()); + copy_diff = false; reshape = false; + layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_nd. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_nd.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); + } + ASSERT_EQ(result_nd.count(), result_2d.count()); + for (int i = 0; i < result_2d.count(); ++i) { + EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); + } + ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); + for (int i = 0; i < backward_result_2d.count(); ++i) { + EXPECT_EQ(backward_result_2d.cpu_diff()[i], + backward_result_nd.cpu_diff()[i]); + } + ASSERT_EQ(backward_weight_result_nd.count(), + backward_weight_result_2d.count()); + for (int i = 0; i < backward_weight_result_2d.count(); ++i) { + EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], + backward_weight_result_nd.cpu_diff()[i]); + } +} +#endif + +TYPED_TEST(MKLDNNConvolutionLayerTest, DISABLED_TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + +// TODO: improve conv so that it runs on all buffers in bottom vector + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + convolution_param->add_kernel_size(KH); + convolution_param->add_stride(CS); + convolution_param->set_num_output(OC); + convolution_param->add_pad(PD); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + MKLDNNConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestDilatedGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(5); + bottom_shape.push_back(6); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + } + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + MKLDNNConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} +#endif + +#if 0 +TYPED_TEST(MKLDNNConvolutionLayerTest, TestGradient3D) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 5; + bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); + bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + MKLDNNConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} +#endif + +TYPED_TEST(MKLDNNConvolutionLayerTest, Test1x1Gradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + convolution_param->add_kernel_size(1); + convolution_param->add_stride(1); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + MKLDNNConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(MKLDNNConvolutionLayerTest, TestGradientGroup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(2); + convolution_param->set_group(GR); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + MKLDNNConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe +#endif // #ifdef MKLDNN_SUPPORTED diff --git a/src/caffe/test/test_mkldnn_inner_product_layer.cpp b/src/caffe/test/test_mkldnn_inner_product_layer.cpp index 084e10a6d..a45d94b32 100644 --- a/src/caffe/test/test_mkldnn_inner_product_layer.cpp +++ b/src/caffe/test/test_mkldnn_inner_product_layer.cpp @@ -165,6 +165,26 @@ TYPED_TEST(MKLDNNInnerProductLayerTest, TestForward) { } } +TYPED_TEST(MKLDNNInnerProductLayerTest, TestForwardNoBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_); + LayerParameter layer_param; + InnerProductParameter* inner_product_param = + layer_param.mutable_inner_product_param(); + inner_product_param->set_num_output(10); + inner_product_param->mutable_weight_filler()->set_type("uniform"); + inner_product_param->set_bias_term(false); + shared_ptr > layer( + new MKLDNNInnerProductLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + for (int i = 0; i < count; ++i) { + EXPECT_GE(data[i], 1.); + } +} + // TODO: add support for transposed weights in MKLDNNInnerProduct // layer and then enable following test (check if it was ported properly) #if 0 @@ -273,6 +293,22 @@ TYPED_TEST(MKLDNNInnerProductLayerTest, TestGradient) { this->blob_top_vec_); } +TYPED_TEST(MKLDNNInnerProductLayerTest, TestGradientNoBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_); + LayerParameter layer_param; + InnerProductParameter* inner_product_param = + layer_param.mutable_inner_product_param(); + inner_product_param->set_num_output(10); + inner_product_param->mutable_weight_filler()->set_type("gaussian"); + inner_product_param->set_bias_term(false); + shared_ptr > layer( + new MKLDNNInnerProductLayer(layer_param)); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(layer.get(), this->blob_bottom_vec_, + this->blob_top_vec_); +} + TYPED_TEST(MKLDNNInnerProductLayerTest, TestGradientTranspose) { typedef typename TypeParam::Dtype Dtype; this->blob_bottom_vec_.push_back(this->blob_bottom_); diff --git a/src/caffe/test/test_mkldnn_pooling_layer.cpp b/src/caffe/test/test_mkldnn_pooling_layer.cpp index f1d3ff4d9..288c114a6 100644 --- a/src/caffe/test/test_mkldnn_pooling_layer.cpp +++ b/src/caffe/test/test_mkldnn_pooling_layer.cpp @@ -586,7 +586,7 @@ TYPED_TEST(MKLDNNPoolingLayerTest, TestGradientMaxTopMask) { #endif // Average Pooling -TYPED_TEST(MKLDNNPoolingLayerTest, TestForwardAve) { +TYPED_TEST(MKLDNNPoolingLayerTest, DISABLED_TestForwardAve) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; PoolingParameter* pooling_param = layer_param.mutable_pooling_param(); diff --git a/src/caffe/test/test_spp_layer.cpp b/src/caffe/test/test_spp_layer.cpp index 708886e22..66660fccf 100644 --- a/src/caffe/test/test_spp_layer.cpp +++ b/src/caffe/test/test_spp_layer.cpp @@ -78,7 +78,12 @@ class SPPLayerTest : public MultiDeviceTest { blob_bottom_vec_3_.push_back(blob_bottom_3_); blob_top_vec_.push_back(blob_top_); } - virtual ~SPPLayerTest() { delete blob_bottom_; delete blob_top_; } + virtual ~SPPLayerTest() { + delete blob_bottom_; + delete blob_top_; + delete blob_bottom_2_; + delete blob_bottom_3_; + } Blob* const blob_bottom_; Blob* const blob_bottom_2_; diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp old mode 100644 new mode 100755 index f7e0b2ce7..43f07cbf8 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -59,6 +59,7 @@ void im2col_cpu(const Dtype* data_im, const int channels, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col) { +#if 0 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - @@ -89,6 +90,85 @@ void im2col_cpu(const Dtype* data_im, const int channels, } } } +#else + int dil_kernel_h = (kernel_h - 1) * dilation_h + 1; + int dil_kernel_w = (kernel_w - 1) * dilation_w + 1; + int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1; + int channels_col = channels * kernel_h * kernel_w; + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % kernel_w; + int h_offset = (c / kernel_w) % kernel_h; + int c_im = c / kernel_h / kernel_w; + + const int hc0 = h_offset * dilation_h - pad_h; + const int wc0 = w_offset * dilation_w - pad_w; + for (int h = 0; h < height_col; ++h) { + int h_pad = h * stride_h + hc0; + + const int row_offset = (c * height_col + h) * width_col; + const int srow_offset = (c_im * height + h_pad) * width; + for (int w = 0; w < width_col; ++w) { + int w_pad = w * stride_w + wc0; + if ((((unsigned)h_pad) < ((unsigned)height)) && (((unsigned)w_pad) < ((unsigned)width))) + data_col[row_offset + w] = data_im[srow_offset + w_pad]; + else { + data_col[row_offset + w] = 0.; + } + } + } + } +#endif +} + +template +void im3d2col_cpu(const Dtype* data_im, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + Dtype* data_col) { + // LOG(ERROR) << "image size: " << depth << ", " << height << ", " << width; + // LOG(ERROR) << "kernel size: " << kernel_d << ", " << kernel_h << ", " << kernel_w; + + // Implicit dilated kernel size + long dil_kernel_h = (kernel_h - 1) * dilation_h + 1; + long dil_kernel_w = (kernel_w - 1) * dilation_w + 1; + long dil_kernel_d = (kernel_d - 1) * dilation_d + 1; + long height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1; + long width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1; + long depth_col = (depth + 2 * pad_d - dil_kernel_d) / stride_d + 1; + long channels_col = channels * kernel_h * kernel_w * kernel_d; + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (long c = 0; c < channels_col; ++c) { + long w_offset = c % kernel_w; + long h_offset = (c / kernel_w) % kernel_h; + long d_offset = (c / kernel_w / kernel_h) % kernel_d; + long c_im = c / kernel_h / kernel_w / kernel_d; + for (int d = 0; d < depth_col; ++d) { + long d_pad = d * stride_d - pad_d + d_offset * dilation_d; + for (long h = 0; h < height_col; ++h) { + long h_pad = h * stride_h - pad_h + h_offset * dilation_h; + for (long w = 0; w < width_col; ++w) { + long w_pad = w * stride_w - pad_w + w_offset * dilation_w; + if (((unsigned long)h_pad < (unsigned long)height) && + ((unsigned long)w_pad < (unsigned long)width) && + ((unsigned long)d_pad < (unsigned long)depth)) { + data_col[((c * depth_col + d) * height_col + h) * width_col + w] = + data_im[((c_im * depth + d_pad) * height + h_pad) * width + w_pad]; + } else { + data_col[((c * depth_col + d) * height_col + h) * width_col + w] = 0.; + } + } + } + } + } } // Explicit instantiation @@ -102,6 +182,20 @@ template void im2col_cpu(const double* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, double* data_col); +template void im3d2col_cpu(const float* data_im, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + float* data_col); +template void im3d2col_cpu(const double* data_im, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + double* data_col); template inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, @@ -203,6 +297,7 @@ void col2im_cpu(const Dtype* data_col, const int channels, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_im) { +#if 0 caffe_set(height * width * channels, Dtype(0), data_im); const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; @@ -231,6 +326,93 @@ void col2im_cpu(const Dtype* data_col, const int channels, } } } +#else + int dil_patch_h = (kernel_h - 1) * dilation_h + 1; + int dil_patch_w = (kernel_w - 1) * dilation_w + 1; + int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; + long chunk_len = kernel_h * kernel_w; + + caffe_set(height * width * channels, Dtype(0), data_im); + + #ifdef _OPENMP + #pragma omp parallel for if (channels > 1) + #endif + for (int idx = 0; idx < channels; ++idx) { + for (int inner_idx = 0; inner_idx < chunk_len; ++inner_idx) { + int c = idx * chunk_len + inner_idx; + int w_offset = c % kernel_w; + int h_offset = (c / kernel_w) % kernel_h; + int c_im = c / kernel_h / kernel_w; + + const int hc0 = h_offset * dilation_h - pad_h; + const int wc0 = w_offset * dilation_w - pad_w; + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { + int h_pad = h * stride_h + hc0; + const int srow_offset = (c_im * height + h_pad) * width; + const int row_offset = (c * height_col + h) * width_col; + int w_pad = w * stride_w + wc0; + if ((((unsigned)h_pad) < ((unsigned)height)) && (((unsigned)w_pad) < ((unsigned)width))) { + data_im[srow_offset + w_pad] += data_col[row_offset + w]; + } + } + } + } + } +#endif +} + +template +void col2im3d_cpu(const Dtype* data_col, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + Dtype* data_im) { + // Implicit dilated patch + long dil_patch_h = (kernel_h - 1) * dilation_h + 1; + long dil_patch_w = (kernel_w - 1) * dilation_w + 1; + long dil_patch_d = (kernel_d - 1) * dilation_d + 1; + long height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; + long width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; + long depth_col = (depth + 2 * pad_d - dil_patch_d) / stride_d + 1; + long num_kernels = channels * height * width * depth; + long chunk_len = kernel_h * kernel_w * kernel_d; + + caffe_set(num_kernels, Dtype(0), data_im); + + #ifdef _OPENMP + #pragma omp parallel for if (channels > 1) + #endif + for (long c_im = 0; c_im < channels; ++c_im) { + for (long c = c_im * chunk_len; c < chunk_len * (c_im + 1); ++c) { + long w_offset = c % kernel_w; + long h_offset = (c / kernel_w) % kernel_h; + long d_offset = (c / kernel_w / kernel_h) % kernel_d; + + long dc0 = d_offset * dilation_d - pad_d; + long hc0 = h_offset * dilation_h - pad_h; + long wc0 = w_offset * dilation_w - pad_w; + for (long d = 0; d < depth_col; ++d) { + long d_pad = d * stride_d + dc0; + for (long h = 0; h < height_col; ++h) { + long h_pad = h * stride_h + hc0; + for (long w = 0; w < width_col; ++w) { + long w_pad = w * stride_w + wc0; + + if (((unsigned long)h_pad < (unsigned long)height) && + ((unsigned long)w_pad < (unsigned long)width) && + ((unsigned long)d_pad < (unsigned long)depth)) { + data_im[((c_im * depth + d_pad) * height + h_pad) * width + w_pad] += + data_col[((c * depth_col + d) * height_col + h) * width_col + w]; + } + } + } + } + } + } } // Explicit instantiation @@ -244,6 +426,20 @@ template void col2im_cpu(const double* data_col, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, double* data_im); +template void col2im3d_cpu(const float* data_col, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + float* data_im); +template void col2im3d_cpu(const double* data_col, const int channels, + const int depth, const int height, const int width, + const int kernel_d, const int kernel_h, const int kernel_w, + const int pad_d, const int pad_h, const int pad_w, + const int stride_d, const int stride_h, const int stride_w, + const int dilation_d, const int dilation_h, const int dilation_w, + double* data_im); template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,