task-mkl2017-layers-reshape-fixes: - ForwardReshapeForward ULT was li…

…mited to MKL2017 engine
intel · Oct 28, 2016 · 7c6249d · 7c6249d
2 parents b57b3bf + 8ddb521
commit 7c6249d
Show file tree

Hide file tree

Showing 16 changed files with 595 additions and 208 deletions.
diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
@@ -73,8 +73,8 @@ class MKLConvolutionLayer : public ConvolutionLayer<Dtype> {
                             const vector<bool>& propagate_down,
                             const vector<Blob<Dtype>*>& bottom);
   // Customized methods
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
@@ -169,6 +169,9 @@ class MKLLRNLayer : public Layer<Dtype> {
   virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
   int size_;
   int pre_pad_;
   Dtype alpha_;
@@ -205,6 +208,9 @@ class MKLPoolingLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
                        const vector<Blob<Dtype>*>& top);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Pooling"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int MinTopBlobs() const { return 1; }
@@ -270,6 +276,12 @@ class MKLReLULayer : public NeuronLayer<Dtype> {
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
 
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+                       const vector<Blob<Dtype>*>& top);
+
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "ReLU"; }
 
  protected:
@@ -291,6 +303,8 @@ class MKLReLULayer : public NeuronLayer<Dtype> {
   shared_ptr<MKLDiff<Dtype> > bwd_top_diff_;
   shared_ptr<MKLDiff<Dtype> > bwd_bottom_diff_;
   dnnPrimitive_t reluFwd_, reluBwd_;
+  vector<size_t> sizes_;
+  vector<size_t> strides_;
 };
 
 template <typename Dtype>
@@ -301,8 +315,8 @@ class MKLConcatLayer : public Layer<Dtype> {
         concatFwd_(static_cast<dnnPrimitive_t>(NULL)),
         concatBwd_(static_cast<dnnPrimitive_t>(NULL)),
         fwd_top_data_    (new MKLData<Dtype>()),
-        bwd_top_diff_    (new MKLDiff<Dtype>()) {
-      }
+        bwd_top_diff_    (new MKLDiff<Dtype>()),
+        split_channels_  (NULL) { }
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
@@ -323,6 +337,9 @@ class MKLConcatLayer : public Layer<Dtype> {
                             const vector<bool>& propagate_down,
                             const vector<Blob<Dtype>*>& bottom);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
  private:
   dnnPrimitive_t concatFwd_;
   dnnPrimitive_t concatBwd_;
@@ -375,6 +392,9 @@ class MKLBatchNormLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
 //  Dtype moving_average_fraction_;
   Dtype eps_;
   bool use_weight_bias_;
@@ -410,6 +430,9 @@ class MKLSplitLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Split"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int MinTopBlobs() const { return 1; }
@@ -429,6 +452,8 @@ class MKLSplitLayer : public Layer<Dtype> {
   vector<shared_ptr<MKLDiff<Dtype> > > bwd_top_diff;
   vector<Dtype> coeffs_;
   size_t num_tops;
+  vector<size_t> sizes_src_;
+  vector<size_t> strides_src_;
   dnnPrimitive_t sumPrimitive;
 };
 
@@ -446,6 +471,9 @@ class MKLEltwiseLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Eltwise"; }
   virtual inline int MinBottomBlobs() const { return 2; }
   virtual inline int ExactNumTopBlobs() const { return 1; }
@@ -472,6 +500,8 @@ class MKLEltwiseLayer : public Layer<Dtype> {
   vector<Dtype> coeffs_;
   Blob<int> max_idx_;
   size_t num_bottoms;
+  int channels_, num_;
+  int height_, width_;
 
   bool stable_prod_grad_;
 };

diff --git a/include/caffe/mkl_memory.hpp b/include/caffe/mkl_memory.hpp
@@ -96,11 +96,15 @@ struct MKLMemoryDescriptorBase : PrvMemDescr,
   void create_internal_layout(const dnnPrimitive_t primitive,
                               dnnResourceType_t type);
   void create_user_layout(size_t dimension, const size_t size[],
-                          const size_t strides[]);
+                          const size_t strides[],
+                          bool create_conversion_if_possible = true);
   void create_layouts(
     const dnnPrimitive_t primitive, dnnResourceType_t type,
     size_t dimension, const size_t size[], const size_t strides[]);
 
+  void remove_internal_layout();
+  void remove_user_layout();
+
   virtual PrvDescrType get_descr_type() {return PRV_DESCR_MKL2017;}
   virtual size_t prv_size() {
       return dnnLayoutGetMemorySize<Dtype>(layout_int);
@@ -112,6 +116,8 @@ struct MKLMemoryDescriptorBase : PrvMemDescr,
   virtual void convert_to_prv(void* cpu_ptr);
   virtual bool layout_compare(shared_ptr<PrvMemDescr> other);
   virtual void convert_from_other(shared_ptr<PrvMemDescr> other);
+ protected:
+  void remove_conversions();
  protected:
   Dtype* internal_ptr;
 };

diff --git a/include/mkl_dnn_cppwrapper.h b/include/mkl_dnn_cppwrapper.h
@@ -101,28 +101,44 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       {return dnnAllocateBuffer_F64(pPtr, layout);}
 
   TEMPLATE_PREFIX dnnError_t dnnReleaseBuffer(
-          void *ptr);
+          void* ptr);
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<float>(
-          void *ptr)
-      {return dnnReleaseBuffer_F32(ptr);}
+          void* ptr) {
+    dnnError_t status = E_SUCCESS;
+    if( ptr != NULL) {
+      status = dnnReleaseBuffer_F32(ptr);
+    }
+    return status; 
+  }
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<double>(
-          void *ptr)
-      {return dnnReleaseBuffer_F64(ptr);}
+          void* ptr) {
+    dnnError_t status = E_SUCCESS;
+    if( ptr != NULL) {
+      status = dnnReleaseBuffer_F64(ptr);
+    }
+    return status; 
+  }
 
   TEMPLATE_PREFIX dnnError_t dnnLayoutDelete(
           dnnLayout_t& layout);
   SPEC_PREFIX dnnError_t dnnLayoutDelete<float>(
           dnnLayout_t& layout) {
-    dnnError_t status = dnnLayoutDelete_F32(layout);
-    layout = NULL;
+    dnnError_t status = E_SUCCESS;
+    if( layout != NULL) {
+      status = dnnLayoutDelete_F32(layout);
+      layout = NULL;
+    }
     return status;
   }
   SPEC_PREFIX dnnError_t dnnLayoutDelete<double>(
           dnnLayout_t& layout) {
-    dnnError_t status = dnnLayoutDelete_F64(layout);
-    layout = NULL;
+    dnnError_t status = E_SUCCESS;
+    if( layout != NULL) {
+      status = dnnLayoutDelete_F64(layout);
+      layout = NULL;
+    }
     return status;
-}
+  }
 
 TEMPLATE_PREFIX dnnError_t dnnPrimitiveAttributesCreate(
         dnnPrimitiveAttributes_t *attributes);
@@ -186,14 +202,20 @@ TEMPLATE_PREFIX dnnError_t dnnDelete(
         dnnPrimitive_t& primitive);
 SPEC_PREFIX dnnError_t dnnDelete<float>(
         dnnPrimitive_t& primitive) {
-  dnnError_t status = dnnDelete_F32(primitive); 
-  primitive = NULL;
+  dnnError_t status = E_SUCCESS;
+  if (primitive != NULL) {
+    status = dnnDelete_F32(primitive); 
+    primitive = NULL;
+  }
   return status;
 }
 SPEC_PREFIX dnnError_t dnnDelete<double>(
         dnnPrimitive_t& primitive) {
-  dnnError_t status = dnnDelete_F64(primitive); 
-  primitive = NULL;
+  dnnError_t status = E_SUCCESS;
+  if (primitive != NULL) {
+    status = dnnDelete_F64(primitive); 
+    primitive = NULL;
+  }
   return status;
 }
 

diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
@@ -68,19 +68,24 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   }
   int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
 #endif
-
+  bool actual_reshaping = false;
   for (int i = 0; i < shape.size(); ++i) {
     CHECK_GE(shape[i], 0);
     if (count_ != 0) {
       CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
     }
     count_ *= shape[i];
-    shape_[i] = shape[i];
+    if (shape_[i] != shape[i]) {
+      actual_reshaping = true;
+      shape_[i] = shape[i];
+    }
 #ifndef CPU_ONLY
     shape_data[i] = shape[i];
 #endif
   }
-  if (count_ > capacity_) {
+  // We restart sync objects when there was change of shape
+  // requested count is bgger than current capacity 
+  if ( (actual_reshaping == true) || (count_ > capacity_) ) {
     capacity_ = count_;
     data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
     diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
@@ -183,6 +183,6 @@ STUB_GPU(InnerProductLayer);
 
 INSTANTIATE_CLASS(InnerProductLayer);
 
-//REGISTER_LAYER_CLASS(InnerProduct);
+// REGISTER_LAYER_CLASS(InnerProduct);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp
@@ -47,17 +47,17 @@ namespace caffe {
 
 template <typename Dtype>
 MKLBatchNormLayer<Dtype>::~MKLBatchNormLayer() {
-  if (batchNormFwd != NULL) dnnDelete<Dtype>(batchNormFwd);
-  if (batchNormBwdData != NULL) dnnDelete<Dtype>(batchNormBwdData);
-  if (batchNormBwdScaleShift != NULL) dnnDelete<Dtype>(batchNormBwdScaleShift);
+  dnnDelete<Dtype>(batchNormFwd);
+  dnnDelete<Dtype>(batchNormBwdData);
+  dnnDelete<Dtype>(batchNormBwdScaleShift);
 
   dnnLayoutDelete<Dtype>(layout_usr_);
   dnnReleaseBuffer<Dtype>(workspace_buffer_);
   dnnReleaseBuffer<Dtype>(scaleShift_buffer_);
 }
 
 template <typename Dtype>
-void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLBatchNormLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   eps_ = this->layer_param_.batch_norm_param().eps();
   use_weight_bias_ = this->layer_param_.batch_norm_param().use_weight_bias();
@@ -93,24 +93,28 @@ void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
 
+  // TODO: Make a cleanup routine to avoid
+  // copy of following code in the Destructor
+
   dnnError_t e;
+  dnnLayoutDelete<Dtype>(layout_usr_);
   e = dnnLayoutCreate<Dtype>(&layout_usr_, dim, sizes, strides);
   CHECK_EQ(e, E_SUCCESS);
 
-  fwd_bottom_data->create_user_layout(dim, sizes, strides);
-  fwd_top_data   ->create_user_layout(dim, sizes, strides);
-  bwd_bottom_diff->create_user_layout(dim, sizes, strides);
-  bwd_top_diff   ->create_user_layout(dim, sizes, strides);
+  fwd_bottom_data->create_user_layout(dim, sizes, strides, false);
+  fwd_top_data   ->create_user_layout(dim, sizes, strides, false);
+  bwd_bottom_diff->create_user_layout(dim, sizes, strides, false);
+  bwd_top_diff   ->create_user_layout(dim, sizes, strides, false);
 
-  workspace_buffer_ = NULL;
-  scaleShift_buffer_ = NULL;
+  dnnReleaseBuffer<Dtype>(workspace_buffer_);
+  dnnReleaseBuffer<Dtype>(scaleShift_buffer_);
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
 
   // Primitives will be allocated during the first fwd pass
-  batchNormFwd = NULL;
-  batchNormBwdData = NULL;
-  batchNormBwdScaleShift = NULL;
+  dnnDelete<Dtype>(batchNormFwd);
+  dnnDelete<Dtype>(batchNormBwdData);
+  dnnDelete<Dtype>(batchNormBwdScaleShift);
 
   if (use_weight_bias_) {
     if ( bias_term_ ) {
@@ -147,10 +151,23 @@ void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }
 }
 
+template <typename Dtype>
+void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  Init(bottom, top);
+}
 
 template <typename Dtype>
 void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  bool reshaping = true;
+  if ((num_ == bottom[0]->num()) &&
+      channels_ == bottom[0]->channels() &&
+      height_ == bottom[0]->height() &&
+      width_ == bottom[0]->width()) {
+    reshaping = false;
+  }
+
   if (bottom[0] == top[0]) {  // in-place computation
     temp_.ReshapeLike(*bottom[0]);
   } else {
@@ -160,6 +177,10 @@ void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     num_ = bottom[0]->num();
     top[0]->Reshape(num_, channels_, height_, width_);
   }
+
+  if (reshaping == true) {
+    Init(bottom, top);
+  }
 }
 
 template <typename Dtype>
@@ -302,13 +323,16 @@ void MKLBatchNormLayer<Dtype>::Backward_cpu(
     const vector<Blob<Dtype>*>& bottom) {
   void *bottom_data = NULL;
   if (bottom[0] == top[0]) {
-    bottom_data = reinterpret_cast<void *>(const_cast<Dtype*>(temp_.cpu_data()));
+    bottom_data = reinterpret_cast<void *>(
+                        const_cast<Dtype*>(temp_.cpu_data()));
   } else {
     bottom_data =
-            reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->prv_data()));
+            reinterpret_cast<void *>(
+                        const_cast<Dtype*>(bottom[0]->prv_data()));
     if (NULL == bottom_data)
       bottom_data =
-            reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->cpu_data()));
+            reinterpret_cast<void *>(
+                        const_cast<Dtype*>(bottom[0]->cpu_data()));
   }
 
   dnnError_t e;