From 685d511bdd1f4ed561ee16b17c1d57c527ab9f31 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 7 Jun 2016 19:11:32 +0200
Subject: [PATCH 01/12] Fix to Reshape to MKL2017 layers: Relu, Pooling, Split,
 Eltwise

- cosmetic changes in names

- Added script performing comparison on googlenet network

- fixed number of training iteration to 1

- dropout cancelation (fixed ratio to 0) and mirrroring disabled

- Added new script for comparison

- Removed specialized scripts

- Added draft for checking if reshape works

- Removed some not needed files

- Fix to Reshape of Relu

Desc:
- Relu reshape impllemented
- Modified mkl_memoery to release allocation of prv data when layout
  internal is recreated

- Fix to pooling reshape + layout recreation

- Fix to reshape of Eltwise Layer

- Fix to reshape of MKLSplit Layer

- Added creating of conversion if they are not available in
  convert_from_prv

- removed not needed file
---
 include/caffe/layers/mkl_layers.hpp    |  21 +++++
 include/caffe/mkl_memory.hpp           |   5 +-
 include/mkl_dnn_cppwrapper.h           |  14 ++--
 src/caffe/layers/mkl_eltwise_layer.cpp |  58 +++++++++----
 src/caffe/layers/mkl_pooling_layer.cpp | 111 +++++++++----------------
 src/caffe/layers/mkl_relu_layer.cpp    |  70 ++++++++++++----
 src/caffe/layers/mkl_split_layer.cpp   |  50 +++++++++--
 src/caffe/mkl_memory.cpp               |  60 +++++++++----
 8 files changed, 258 insertions(+), 131 deletions(-)
diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 32ce6e622..c75f48951 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -205,6 +205,9 @@ class MKLPoolingLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
                        const vector<Blob<Dtype>*>& top);
 
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Pooling"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int MinTopBlobs() const { return 1; }
@@ -270,6 +273,12 @@ class MKLReLULayer : public NeuronLayer<Dtype> {
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
 
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+                       const vector<Blob<Dtype>*>& top);
+
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "ReLU"; }
 
  protected:
@@ -291,6 +300,8 @@ class MKLReLULayer : public NeuronLayer<Dtype> {
   shared_ptr<MKLDiff<Dtype> > bwd_top_diff_;
   shared_ptr<MKLDiff<Dtype> > bwd_bottom_diff_;
   dnnPrimitive_t reluFwd_, reluBwd_;
+  vector<size_t> sizes_;
+  vector<size_t> strides_;
 };
 
 template <typename Dtype>
@@ -410,6 +421,9 @@ class MKLSplitLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Split"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int MinTopBlobs() const { return 1; }
@@ -429,6 +443,8 @@ class MKLSplitLayer : public Layer<Dtype> {
   vector<shared_ptr<MKLDiff<Dtype> > > bwd_top_diff;
   vector<Dtype> coeffs_;
   size_t num_tops;
+  vector<size_t> sizes_src_;
+  vector<size_t> strides_src_;
   dnnPrimitive_t sumPrimitive;
 };
 
@@ -446,6 +462,9 @@ class MKLEltwiseLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
+
   virtual inline const char* type() const { return "Eltwise"; }
   virtual inline int MinBottomBlobs() const { return 2; }
   virtual inline int ExactNumTopBlobs() const { return 1; }
@@ -472,6 +491,8 @@ class MKLEltwiseLayer : public Layer<Dtype> {
   vector<Dtype> coeffs_;
   Blob<int> max_idx_;
   size_t num_bottoms;
+  int channels_, num_;
+  int height_, width_;
 
   bool stable_prod_grad_;
 };
diff --git a/include/caffe/mkl_memory.hpp b/include/caffe/mkl_memory.hpp
index a785c1e29..75739b3eb 100644
--- a/include/caffe/mkl_memory.hpp
+++ b/include/caffe/mkl_memory.hpp
@@ -96,7 +96,8 @@ struct MKLMemoryDescriptorBase : PrvMemDescr,
   void create_internal_layout(const dnnPrimitive_t primitive,
                               dnnResourceType_t type);
   void create_user_layout(size_t dimension, const size_t size[],
-                          const size_t strides[]);
+                          const size_t strides[],
+                          bool create_conversion_if_possible = true);
   void create_layouts(
     const dnnPrimitive_t primitive, dnnResourceType_t type,
     size_t dimension, const size_t size[], const size_t strides[]);
@@ -112,6 +113,8 @@ struct MKLMemoryDescriptorBase : PrvMemDescr,
   virtual void convert_to_prv(void* cpu_ptr);
   virtual bool layout_compare(shared_ptr<PrvMemDescr> other);
   virtual void convert_from_other(shared_ptr<PrvMemDescr> other);
+ protected:
+  void remove_conversions();
  protected:
   Dtype* internal_ptr;
 };
diff --git a/include/mkl_dnn_cppwrapper.h b/include/mkl_dnn_cppwrapper.h
index 9e23a6155..36be959e4 100644
--- a/include/mkl_dnn_cppwrapper.h
+++ b/include/mkl_dnn_cppwrapper.h
@@ -101,13 +101,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       {return dnnAllocateBuffer_F64(pPtr, layout);}
 
   TEMPLATE_PREFIX dnnError_t dnnReleaseBuffer(
-          void *ptr);
+          void* ptr);
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<float>(
-          void *ptr)
-      {return dnnReleaseBuffer_F32(ptr);}
+          void* ptr) {
+    dnnError_t status = dnnReleaseBuffer_F32(ptr);
+    return status; 
+  }
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<double>(
-          void *ptr)
-      {return dnnReleaseBuffer_F64(ptr);}
+          void* ptr) {
+    dnnError_t status = dnnReleaseBuffer_F64(ptr);
+    return status; 
+  }
 
   TEMPLATE_PREFIX dnnError_t dnnLayoutDelete(
           dnnLayout_t& layout);
diff --git a/src/caffe/layers/mkl_eltwise_layer.cpp b/src/caffe/layers/mkl_eltwise_layer.cpp
index 1c653cf2d..d6d1eb73a 100644
--- a/src/caffe/layers/mkl_eltwise_layer.cpp
+++ b/src/caffe/layers/mkl_eltwise_layer.cpp
@@ -50,19 +50,13 @@ MKLEltwiseLayer<Dtype>::~MKLEltwiseLayer() {
 }
 
 template <typename Dtype>
-void MKLEltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK(this->layer_param().eltwise_param().coeff_size() == 0
-      || this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
-      "MKLEltwise Layer takes one coefficient per bottom blob.";
-  CHECK(!(this->layer_param().eltwise_param().operation()
-      == EltwiseParameter_EltwiseOp_PROD
-      && this->layer_param().eltwise_param().coeff_size())) <<
-      "MKLEltwise layer only takes coefficients for summation.";
-
-  CHECK(this->layer_param().eltwise_param().operation() ==
-    EltwiseParameter_EltwiseOp_SUM)
-      << "MKLEltwise Layer only process summation.";
+void MKLEltwiseLayer<Dtype>::Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top) {
+  
+  channels_ = bottom[0]->channels();
+  height_ = bottom[0]->height();
+  width_ = bottom[0]->width();
+  num_ = bottom[0]->num();
 
   op_ = this->layer_param_.eltwise_param().operation();
   // Blob-wise coefficients for the elementwise operation.
@@ -88,16 +82,38 @@ void MKLEltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       bwd_bottom_diff.push_back(
         shared_ptr<MKLDiff<Dtype> >(new MKLDiff<Dtype>));
       CHECK_EQ(dim_src, bottom[i]->shape().size());
-      fwd_bottom_data[i]->create_user_layout(dim_src, sizes_src, strides_src);
-      bwd_bottom_diff[i]->create_user_layout(dim_src, sizes_src, strides_src);
+      fwd_bottom_data[i]->create_user_layout(dim_src, sizes_src, strides_src,false);
+      bwd_bottom_diff[i]->create_user_layout(dim_src, sizes_src, strides_src,false);
   }
 
-  fwd_top_data->create_user_layout(dim_src, sizes_src, strides_src);
+  fwd_top_data->create_user_layout(dim_src, sizes_src, strides_src,false);
+
+  dnnDelete<Dtype>(sumPrimitive);
+}
+
+
+template <typename Dtype>
+void MKLEltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK(this->layer_param().eltwise_param().coeff_size() == 0
+      || this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
+      "MKLEltwise Layer takes one coefficient per bottom blob.";
+  CHECK(!(this->layer_param().eltwise_param().operation()
+      == EltwiseParameter_EltwiseOp_PROD
+      && this->layer_param().eltwise_param().coeff_size())) <<
+      "MKLEltwise layer only takes coefficients for summation.";
+
+  CHECK(this->layer_param().eltwise_param().operation() ==
+    EltwiseParameter_EltwiseOp_SUM)
+      << "MKLEltwise Layer only process summation.";
+
+  Init(bottom,top);
 }
 
 template <typename Dtype>
 void MKLEltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+
   for (int i = 1; i < bottom.size(); ++i) {
     CHECK(bottom[i]->shape() == bottom[0]->shape());
   }
@@ -107,6 +123,16 @@ void MKLEltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       EltwiseParameter_EltwiseOp_MAX && top.size() == 1) {
     max_idx_.Reshape(bottom[0]->shape());
   }
+
+  if (channels_ == bottom[0]->channels() &&
+      height_ == bottom[0]->height() &&
+      width_ == bottom[0]->width() &&
+      num_ == bottom[0]->num() &&
+      num_bottoms == bottom.size()) {
+    return;
+  }
+
+  Init(bottom,top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_pooling_layer.cpp b/src/caffe/layers/mkl_pooling_layer.cpp
index 4dd3c0723..a49a3a111 100644
--- a/src/caffe/layers/mkl_pooling_layer.cpp
+++ b/src/caffe/layers/mkl_pooling_layer.cpp
@@ -55,8 +55,10 @@ MKLPoolingLayer<Dtype>::~MKLPoolingLayer() {
 }
 
 template <typename Dtype>
-void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
+void MKLPoolingLayer<Dtype>::Init(
+      const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) 
+{
   PoolingParameter pool_param = this->layer_param_.pooling_param();
 
   channels_ = bottom[0]->channels();
@@ -127,7 +129,7 @@ void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   pooled_height_ = static_cast<int>(ceil(static_cast<float>(
       bottom[0]->height() + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
   pooled_width_ = static_cast<int>(ceil(static_cast<float>(
-      bottom[0]->height() + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
+      bottom[0]->width() + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
   if (pad_h_ || pad_w_) {
     // If we have padding, ensure that the last pooling starts strictly
     // inside the image (instead of at the padding); otherwise clip the last.
@@ -141,6 +143,24 @@ void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     CHECK_LT((pooled_width_ - 1) * stride_w_, bottom[0]->height() + pad_w_);
   }
 
+  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
+      pooled_width_);
+  if (top.size() > 1) {
+    (reinterpret_cast<Blob<size_t>* > (top[1]) )->Reshape(bottom[0]->num(),
+            channels_, pooled_height_, pooled_width_);
+  }
+  // If max/min/avg pooling, we will initialize the vector index part.
+  if (top.size() == 1) {
+    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
+            pooled_width_);
+  }
+  // If stochastic pooling, we will initialize the random index part.
+  if (this->layer_param_.pooling_param().pool() ==
+      PoolingParameter_PoolMethod_STOCHASTIC) {
+    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
+      pooled_width_);
+  }
+
   size_t dim = 4;
   size_t src_sizes[4], src_strides[4];
   size_t dst_sizes[4], dst_strides[4];
@@ -180,13 +200,20 @@ void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
 
-  fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides);
-  fwd_top_data   ->create_user_layout(dim, dst_sizes, dst_strides);
-  bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides);
-  bwd_top_diff   ->create_user_layout(dim, dst_sizes, dst_strides);
+  fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides,false);
+  fwd_top_data   ->create_user_layout(dim, dst_sizes, dst_strides,false);
+  bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides,false);
+  bwd_top_diff   ->create_user_layout(dim, dst_sizes, dst_strides,false);
   // Primitives will be allocated during the first fwd pass
-  poolingFwd = NULL;
-  poolingBwd = NULL;
+  dnnDelete<Dtype>(poolingFwd);
+  dnnDelete<Dtype>(poolingBwd);
+}
+
+template <typename Dtype>
+void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) 
+{
+  Init(bottom,top);
 }
 
 template <typename Dtype>
@@ -195,77 +222,17 @@ void MKLPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
       << "corresponding to (num, channels, height, width)";
 
-  bool shape_changed = true;
   if (channels_ == bottom[0]->channels() &&
       height_ == bottom[0]->height() &&
       width_ == bottom[0]->width() &&
-      num_ == bottom[0]->num())
-    shape_changed = false;
-
-  channels_ = bottom[0]->channels();
-  height_ = bottom[0]->height();
-  width_ = bottom[0]->width();
-  num_ = bottom[0]->num();
-
-  if (global_pooling_) {
-    kernel_h_ = bottom[0]->height();
-    kernel_w_ = bottom[0]->width();
-  }
-  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
-      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
-  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
-      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
-  if (pad_h_ || pad_w_) {
-    // If we have padding, ensure that the last pooling starts strictly
-    // inside the image (instead of at the padding); otherwise clip the last.
-    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
-      --pooled_height_;
-    }
-    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
-      --pooled_width_;
-    }
-    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
-    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
-  }
-  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
-      pooled_width_);
-  if (top.size() > 1) {
-    (reinterpret_cast<Blob<size_t>* > (top[1]) )->Reshape(bottom[0]->num(),
-            channels_, pooled_height_, pooled_width_);
-  }
-  // If max/min/avg pooling, we will initialize the vector index part.
-  if (top.size() == 1) {
-    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
-            pooled_width_);
-  }
-  // If stochastic pooling, we will initialize the random index part.
-  if (this->layer_param_.pooling_param().pool() ==
-      PoolingParameter_PoolMethod_STOCHASTIC) {
-    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
-      pooled_width_);
+      num_ == bottom[0]->num()) {
+    return;
   }
 
-  if (shape_changed) {
-    // Recreate MKL layout
-    size_t dim = 4;
-    size_t src_sizes[4], src_strides[4];
-
-    src_sizes[0] = bottom[0]->width();
-    src_sizes[1] = bottom[0]->height();
-    src_sizes[2] = bottom[0]->channels();
-    src_sizes[3] = bottom[0]->num();
+  Init(bottom,top);
 
-    src_strides[0] = 1;
-    src_strides[1] = src_sizes[0];
-    src_strides[2] = src_sizes[0]*src_sizes[1];
-    src_strides[3] = src_sizes[0]*src_sizes[1]*src_sizes[2];
-
-    fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides);
-  }
 }
 
-// TODO(Yangqing): Is there a faster way to do pooling in the channel-first
-// case?
 template <typename Dtype>
 void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
diff --git a/src/caffe/layers/mkl_relu_layer.cpp b/src/caffe/layers/mkl_relu_layer.cpp
index e967af430..8ec6cb93c 100644
--- a/src/caffe/layers/mkl_relu_layer.cpp
+++ b/src/caffe/layers/mkl_relu_layer.cpp
@@ -49,33 +49,73 @@ MKLReLULayer<Dtype>::~MKLReLULayer() {
 }
 
 template <typename Dtype>
-void MKLReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLReLULayer<Dtype>::Init(
+      const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-//  CHECK_EQ(top[0]->shape(), bottom[0]->shape());
+
   size_t dim = bottom[0]->shape().size();
-  size_t sizes[dim], strides[dim];
+  this->sizes_.resize(dim);
+  this->strides_.resize(dim);
   for (size_t d = 0; d < dim; ++d) {
-      sizes[d] = bottom[0]->shape()[dim - 1 - d];
-      strides[d] = (d == 0) ? 1 : strides[d-1]*sizes[d-1];
+      this->sizes_[d] = bottom[0]->shape()[dim - 1 - d];
+      this->strides_[d] = (d == 0) ? 1 : this->strides_[d-1]*this->sizes_[d-1];
   }
 
   // Names are for debugging only
-  fwd_bottom_data_->name = "fwd_bottom_data   @ " + this->layer_param_.name();
-  fwd_top_data_->name =    "fwd_top_data      @ " + this->layer_param_.name();
-  bwd_bottom_diff_->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
-  bwd_top_diff_->name =    "bwd_top_diff      @ " + this->layer_param_.name();
+  this->fwd_bottom_data_->name = "fwd_bottom_data   @ " + this->layer_param_.name();
+  this->fwd_top_data_->name =    "fwd_top_data      @ " + this->layer_param_.name();
+  this->bwd_bottom_diff_->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
+  this->bwd_top_diff_->name =    "bwd_top_diff      @ " + this->layer_param_.name();
 
-  fwd_bottom_data_->create_user_layout(dim, sizes, strides);
-  fwd_top_data_   ->create_user_layout(dim, sizes, strides);
-  bwd_bottom_diff_->create_user_layout(dim, sizes, strides);
-  bwd_top_diff_   ->create_user_layout(dim, sizes, strides);
+  this->fwd_bottom_data_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
+  this->fwd_top_data_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
+  this->bwd_bottom_diff_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
+  this->bwd_top_diff_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
 
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
-  reluFwd_ = NULL;  // Will be allocated in a "lazy" way in first forward pass
-  reluBwd_ = NULL;  // Will be allocated in a "lazy" way in first backward pass
+  dnnDelete<Dtype>(reluFwd_); // Will be allocated in a "lazy" way in first forward pass
+  dnnDelete<Dtype>(reluBwd_); // Will be allocated in a "lazy" way in first backward pass
+}
+
+template <typename Dtype>
+void MKLReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+//  CHECK_EQ(top[0]->shape(), bottom[0]->shape());
+    Init(bottom,top); 
 }
 
+template <typename Dtype>
+void MKLReLULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  NeuronLayer<Dtype>::Reshape(bottom,top);
+
+  // Here I check for sizes whther to destroy primitives
+  size_t dim = bottom[0]->shape().size();
+
+  // If dimensions of blobs are the same as they were then
+  // do not really destroy primitives
+  if (dim == this->sizes_.size()) {
+    //.. check for strides and size dims if they corresspond each other
+
+    // TODO: speedup comparison?
+    bool is_match = true;
+    for (size_t d = 0; d < dim; ++d) {
+        is_match = is_match && (this->sizes_[d] == bottom[0]->shape()[dim - 1 - d]);
+        is_match = is_match && (this->strides_[d] == ((d == 0) ? 1 : this->strides_[d-1]*this->sizes_[d-1]));
+    }
+
+    // If no new modification was done to layout sizes,
+    // strides realtivly to previous iteration then no primitives recreation is needed
+    if (is_match) {
+      return;
+    }
+  }
+
+  Init(bottom,top);
+}
+
+
 template <typename Dtype>
 void MKLReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
diff --git a/src/caffe/layers/mkl_split_layer.cpp b/src/caffe/layers/mkl_split_layer.cpp
index e5307a982..28197fb7e 100644
--- a/src/caffe/layers/mkl_split_layer.cpp
+++ b/src/caffe/layers/mkl_split_layer.cpp
@@ -49,26 +49,36 @@ MKLSplitLayer<Dtype>::~MKLSplitLayer() {
 }
 
 template <typename Dtype>
-void MKLSplitLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLSplitLayer<Dtype>::Init(
+      const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   num_tops = top.size();
   size_t dim_src = bottom[0]->shape().size();
-
-  size_t sizes_src[dim_src], strides_src[dim_src];
+  this->sizes_src_.resize(dim_src);
+  this->strides_src_.resize(dim_src);
   for (size_t d = 0; d < dim_src; ++d) {
-    sizes_src[d] = bottom[0]->shape()[dim_src - d - 1];
-    strides_src[d] = (d == 0) ? 1 : strides_src[d-1]*sizes_src[d-1];
+    this->sizes_src_[d] = bottom[0]->shape()[dim_src - d - 1];
+    this->strides_src_[d] = (d == 0) ? 1 : this->strides_src_[d-1]*this->sizes_src_[d-1];
   }
 
   for (size_t i = 0; i < num_tops; ++i) {
     bwd_top_diff.push_back(shared_ptr<MKLDiff<Dtype> >(new MKLDiff<Dtype>));
-    bwd_top_diff[i]->create_user_layout(dim_src, sizes_src, strides_src);
+    bwd_top_diff[i]->create_user_layout(dim_src, &(this->sizes_src_[0]), &(this->strides_src_[0]),false);
   }
 
   // Blob-wise coefficients for the elementwise operation.
   coeffs_ = vector<Dtype>(top.size(), 1);
 
-  bwd_bottom_diff->create_user_layout(dim_src, sizes_src, strides_src);
+  bwd_bottom_diff->create_user_layout(dim_src, &(this->sizes_src_[0]), &(this->strides_src_[0]),false);
+
+  // Primitive will be created at first time it is to be used
+  dnnDelete<Dtype>(sumPrimitive);
+}
+
+template <typename Dtype>
+void MKLSplitLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  Init(bottom,top);
 }
 
 template <typename Dtype>
@@ -86,6 +96,32 @@ void MKLSplitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     top[i]->ReshapeLike(*bottom[0]);
     CHECK_EQ(count_, top[i]->count());
   }
+
+  // Here we check
+  // Here I check for sizes whther to destroy primitives
+  size_t dim_src = bottom[0]->shape().size();
+
+  // If dimensions of blobs are the same as they were then
+  // do not really destroy primitives
+  if (dim_src == this->sizes_src_.size()) {
+    //.. check for strides and size dims if they corresspond each other
+
+    // TODO: speedup comparison?
+    bool is_match = true;
+    for (size_t d = 0; d < dim_src; ++d) {
+        is_match = is_match && (this->sizes_src_[d] == bottom[0]->shape()[dim_src - 1 - d]);
+        is_match = is_match && (this->strides_src_[d] == ((d == 0) ? 1 : 
+                                this->strides_src_[d-1]*this->sizes_src_[d-1]));
+    }
+
+    // If no new modification was done to layout sizes,
+    // strides realtivly to previous iteration then no primitives recreation is needed
+    if (is_match) {
+      return;
+    }
+  }
+
+  Init(bottom,top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/mkl_memory.cpp b/src/caffe/mkl_memory.cpp
index 08727b367..1d17fe760 100644
--- a/src/caffe/mkl_memory.cpp
+++ b/src/caffe/mkl_memory.cpp
@@ -49,18 +49,7 @@ namespace caffe {
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_conversions() {
   int status;
-  if (this->convert_from_int) {
-    DLOG(INFO) << "convert_from_int layout already created, recreating for"
-           << this->name;
-    status = dnnDelete<Dtype>(this->convert_from_int);
-    CHECK_EQ(status, E_SUCCESS);
-  }
-  if (this->convert_to_int) {
-    DLOG(INFO) << "convert_to_int layout already created, recreating for"
-           << this->name;
-    status = dnnDelete<Dtype>(this->convert_to_int);
-    CHECK_EQ(status, E_SUCCESS);
-  }
+  this->remove_conversions();
   if (layout_int
       && !dnnLayoutCompare<Dtype>(layout_usr, layout_int)) {
     CHECK(layout_usr);
@@ -77,6 +66,23 @@ void MKLMemoryDescriptorBase<Dtype>::create_conversions() {
   }
 }
 
+template <typename Dtype>
+void MKLMemoryDescriptorBase<Dtype>::remove_conversions() {
+  int status;
+  if (this->convert_from_int) {
+    DLOG(INFO) << "convert_from_int layout already created, recreating for"
+           << this->name;
+    status = dnnDelete<Dtype>(this->convert_from_int);
+    CHECK_EQ(status, E_SUCCESS);
+  }
+  if (this->convert_to_int) {
+    DLOG(INFO) << "convert_to_int layout already created, recreating for"
+           << this->name;
+    status = dnnDelete<Dtype>(this->convert_to_int);
+    CHECK_EQ(status, E_SUCCESS);
+  }
+}
+
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_internal_layout(
     const dnnPrimitive_t primitive, dnnResourceType_t type) {
@@ -86,6 +92,12 @@ void MKLMemoryDescriptorBase<Dtype>::create_internal_layout(
            << this->name;
     status = dnnLayoutDelete<Dtype>(this->layout_int);
     CHECK_EQ(status, E_SUCCESS);
+
+    // with layout invalidated we should also remove Allocation
+    // as next layout may declare diffrent sizes of allocation
+    status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
+    this->internal_ptr = NULL;
+    CHECK_EQ(status, E_SUCCESS);
   }
   status = dnnLayoutCreateFromPrimitive<Dtype>(
       &this->layout_int, primitive, type);
@@ -99,7 +111,10 @@ void MKLMemoryDescriptorBase<Dtype>::create_internal_layout(
 
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_user_layout(
-    size_t dimension, const size_t size[], const size_t strides[]) {
+    size_t dimension,
+    const size_t size[],
+    const size_t strides[],
+    bool create_conversion_if_possible) {
   int status;
   if (this->layout_usr) {
     DLOG(INFO) << "User layout already created, recreating for"
@@ -113,8 +128,18 @@ void MKLMemoryDescriptorBase<Dtype>::create_user_layout(
   CHECK_EQ(status, E_SUCCESS) << "Failed dnnLayoutCreate with status "
       << status << " for buffer: " << this->name << "\n";
 
-  if (this->layout_int)
-    this->create_conversions();
+  // If conversion creation is to happen
+  // then if only we have internal layout already in place
+  // we can proceed with conversion creation. 
+  // Otherwise we make sure that existing conversions are deleted
+  // as with new layout creation they are being instantly invalidated
+  if (create_conversion_if_possible) {
+    if (this->layout_int) {
+      this->create_conversions();
+    }
+  } else {
+    this->remove_conversions();
+  }
 }
 
 template <typename Dtype>
@@ -137,6 +162,11 @@ void MKLMemoryDescriptorBase<Dtype>::create_layouts(
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::convert_from_prv(void* cpu_ptr) {
   CHECK(cpu_ptr);
+  // When no conversion is available then 
+  // recreate them if layouts are available
+  if (this-> convert_from_int == NULL) {
+    this->create_conversions();    
+  }
   CHECK(this->convert_from_int);
   int status;
   void *convert_resources[dnnResourceNumber];

From 44fcd26340c9e6f9a87608791e480569b0f53c1d Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Fri, 14 Oct 2016 19:01:46 +0200
Subject: [PATCH 02/12] - Lint fixes

---
 src/caffe/layers/mkl_relu_layer.cpp | 9 ++++-----
 src/caffe/mkl_memory.cpp            | 8 ++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/caffe/layers/mkl_relu_layer.cpp b/src/caffe/layers/mkl_relu_layer.cpp
index 8ec6cb93c..fd4120f0b 100644
--- a/src/caffe/layers/mkl_relu_layer.cpp
+++ b/src/caffe/layers/mkl_relu_layer.cpp
@@ -52,7 +52,6 @@ template <typename Dtype>
 void MKLReLULayer<Dtype>::Init(
       const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-
   size_t dim = bottom[0]->shape().size();
   this->sizes_.resize(dim);
   this->strides_.resize(dim);
@@ -67,10 +66,10 @@ void MKLReLULayer<Dtype>::Init(
   this->bwd_bottom_diff_->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
   this->bwd_top_diff_->name =    "bwd_top_diff      @ " + this->layer_param_.name();
 
-  this->fwd_bottom_data_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
-  this->fwd_top_data_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
-  this->bwd_bottom_diff_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
-  this->bwd_top_diff_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]),false);
+  this->fwd_bottom_data_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
+  this->fwd_top_data_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
+  this->bwd_bottom_diff_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
+  this->bwd_top_diff_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
 
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
diff --git a/src/caffe/mkl_memory.cpp b/src/caffe/mkl_memory.cpp
index 1d17fe760..7d713464a 100644
--- a/src/caffe/mkl_memory.cpp
+++ b/src/caffe/mkl_memory.cpp
@@ -130,7 +130,7 @@ void MKLMemoryDescriptorBase<Dtype>::create_user_layout(
 
   // If conversion creation is to happen
   // then if only we have internal layout already in place
-  // we can proceed with conversion creation. 
+  // we can proceed with conversion creation.
   // Otherwise we make sure that existing conversions are deleted
   // as with new layout creation they are being instantly invalidated
   if (create_conversion_if_possible) {
@@ -146,7 +146,7 @@ template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_layouts(
     const dnnPrimitive_t primitive, dnnResourceType_t type,
     size_t dimension, const size_t size[], const size_t strides[]) {
-  // To avoid creating conversion among potentialiy diffrent 
+  // To avoid creating conversion among potentialiy diffrent
   // (in terms of size) layouts we need to destroy existing layouts here
 
   if (this->layout_usr) {
@@ -162,10 +162,10 @@ void MKLMemoryDescriptorBase<Dtype>::create_layouts(
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::convert_from_prv(void* cpu_ptr) {
   CHECK(cpu_ptr);
-  // When no conversion is available then 
+  // When no conversion is available then
   // recreate them if layouts are available
   if (this-> convert_from_int == NULL) {
-    this->create_conversions();    
+    this->create_conversions();
   }
   CHECK(this->convert_from_int);
   int status;

From 24a8d62717b2ae4b36b6b01fded44a067dde0fde Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 18 Oct 2016 13:55:46 +0200
Subject: [PATCH 03/12] - fix to memory leak in MKL conv layer

---
 include/caffe/mkl_memory.hpp               |  3 ++
 src/caffe/layers/mkl_concat_layer.cpp      |  4 +-
 src/caffe/layers/mkl_convolution_layer.cpp |  1 +
 src/caffe/mkl_memory.cpp                   | 59 +++++++++++++++-------
 4 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/include/caffe/mkl_memory.hpp b/include/caffe/mkl_memory.hpp
index 75739b3eb..52538b84c 100644
--- a/include/caffe/mkl_memory.hpp
+++ b/include/caffe/mkl_memory.hpp
@@ -102,6 +102,9 @@ struct MKLMemoryDescriptorBase : PrvMemDescr,
     const dnnPrimitive_t primitive, dnnResourceType_t type,
     size_t dimension, const size_t size[], const size_t strides[]);
 
+  void remove_internal_layout();
+  void remove_user_layout();
+
   virtual PrvDescrType get_descr_type() {return PRV_DESCR_MKL2017;}
   virtual size_t prv_size() {
       return dnnLayoutGetMemorySize<Dtype>(layout_int);
diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index 59238cd39..46cafad3e 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -97,8 +97,8 @@ void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   bwd_top_diff_->create_user_layout(dim_dst, sizes_dst, strides_dst);
   fwd_top_data_->create_user_layout(dim_dst, sizes_dst, strides_dst);
 
-  concatFwd_ = NULL;
-  concatBwd_ = NULL;
+  dnnDelete<Dtype>(concatFwd_);
+  dnnDelete<Dtype>(concatBwd_);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_convolution_layer.cpp b/src/caffe/layers/mkl_convolution_layer.cpp
index 18068d852..3f76539da 100644
--- a/src/caffe/layers/mkl_convolution_layer.cpp
+++ b/src/caffe/layers/mkl_convolution_layer.cpp
@@ -284,6 +284,7 @@ void MKLConvolutionLayer<Dtype>::Init(
     // layout_usr = internal layout of weight data on forward convolution
     bwdf2fwd_filter_diff->create_internal_layout(convolutionBwdFilter,
         dnnResourceDiffFilter);
+    bwdf2fwd_filter_diff->remove_user_layout();
     status = dnnLayoutCreateFromPrimitive<Dtype>(
         &bwdf2fwd_filter_diff->layout_usr, convolutionFwd, dnnResourceFilter);
     CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status "
diff --git a/src/caffe/mkl_memory.cpp b/src/caffe/mkl_memory.cpp
index 7d713464a..2ae21217c 100644
--- a/src/caffe/mkl_memory.cpp
+++ b/src/caffe/mkl_memory.cpp
@@ -87,18 +87,7 @@ template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_internal_layout(
     const dnnPrimitive_t primitive, dnnResourceType_t type) {
   int status;
-  if (this->layout_int) {
-    DLOG(INFO) << "Internal layout already created, recreating for"
-           << this->name;
-    status = dnnLayoutDelete<Dtype>(this->layout_int);
-    CHECK_EQ(status, E_SUCCESS);
-
-    // with layout invalidated we should also remove Allocation
-    // as next layout may declare diffrent sizes of allocation
-    status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
-    this->internal_ptr = NULL;
-    CHECK_EQ(status, E_SUCCESS);
-  }
+  this->remove_internal_layout();
   status = dnnLayoutCreateFromPrimitive<Dtype>(
       &this->layout_int, primitive, type);
   CHECK_EQ(status, E_SUCCESS)
@@ -116,13 +105,7 @@ void MKLMemoryDescriptorBase<Dtype>::create_user_layout(
     const size_t strides[],
     bool create_conversion_if_possible) {
   int status;
-  if (this->layout_usr) {
-    DLOG(INFO) << "User layout already created, recreating for"
-               << this->name;
-    status = dnnLayoutDelete<Dtype>(this->layout_usr);
-    CHECK_EQ(status, E_SUCCESS);
-  }
-
+  this->remove_user_layout();
   status = dnnLayoutCreate<Dtype>(
       &this->layout_usr, dimension, size, strides);
   CHECK_EQ(status, E_SUCCESS) << "Failed dnnLayoutCreate with status "
@@ -142,6 +125,44 @@ void MKLMemoryDescriptorBase<Dtype>::create_user_layout(
   }
 }
 
+template <typename Dtype>
+void MKLMemoryDescriptorBase<Dtype>::remove_internal_layout() {
+  int status;
+  if (this->layout_int) {
+    DLOG(INFO) << "Internal layout already created, recreating for"
+           << this->name;
+    status = dnnLayoutDelete<Dtype>(this->layout_int);
+    CHECK_EQ(status, E_SUCCESS);
+
+    // with layout invalidated we should also remove Allocation
+    // as next layout may declare diffrent sizes of allocation
+    if(this->internal_ptr) {
+      status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
+      this->internal_ptr = NULL;
+      CHECK_EQ(status, E_SUCCESS);
+    }
+  }
+}
+
+template <typename Dtype>
+void MKLMemoryDescriptorBase<Dtype>::remove_user_layout() {
+  int status;
+  if (this->layout_usr) {
+    DLOG(INFO) << "Internal layout already created, recreating for"
+           << this->name;
+    status = dnnLayoutDelete<Dtype>(this->layout_usr);
+    CHECK_EQ(status, E_SUCCESS);
+
+    // with layout invalidated we should also remove Allocation
+    // as next layout may declare diffrent sizes of allocation
+    if(this->internal_ptr) {
+      status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
+      this->internal_ptr = NULL;
+      CHECK_EQ(status, E_SUCCESS);
+    }
+  }
+}
+
 template <typename Dtype>
 void MKLMemoryDescriptorBase<Dtype>::create_layouts(
     const dnnPrimitive_t primitive, dnnResourceType_t type,

From 248b530e50e71b96f40eb7020220f861cfc7a336 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 18 Oct 2016 15:16:11 +0200
Subject: [PATCH 04/12] - Candidate fix to Reshaping of MKL layers Concat and
 LRN

---
 include/caffe/layers/mkl_layers.hpp   |  5 ++++
 src/caffe/layers/mkl_concat_layer.cpp | 27 ++++++++++++++++-----
 src/caffe/layers/mkl_lrn_layer.cpp    | 35 +++++++++++++++++++++++----
 3 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index c75f48951..fdd92f59a 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -169,6 +169,9 @@ class MKLLRNLayer : public Layer<Dtype> {
   virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
+
   int size_;
   int pre_pad_;
   Dtype alpha_;
@@ -334,6 +337,8 @@ class MKLConcatLayer : public Layer<Dtype> {
                             const vector<bool>& propagate_down,
                             const vector<Blob<Dtype>*>& bottom);
 
+  void Init( const vector<Blob<Dtype>*>& bottom,
+             const vector<Blob<Dtype>*>& top);
  private:
   dnnPrimitive_t concatFwd_;
   dnnPrimitive_t concatBwd_;
diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index 46cafad3e..787841879 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -49,8 +49,9 @@ template <typename Dtype> MKLConcatLayer<Dtype>::~MKLConcatLayer() {
 }
 
 template <typename Dtype>
-void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLConcatLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
   const vector<Blob<Dtype>*>& top) {
+
   size_t dim_src = bottom[0]->shape().size();
   size_t dim_dst = dim_src;
 
@@ -81,8 +82,8 @@ void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 
     split_channels_[i] = bottom[i]->channels();
     channels_ += split_channels_[i];
-    fwd_bottom_data_[i]->create_user_layout(dim_src, sizes_src, strides_src);
-    bwd_bottom_diff_[i]->create_user_layout(dim_src, sizes_src, strides_src);
+    fwd_bottom_data_[i]->create_user_layout(dim_src, sizes_src, strides_src, false);
+    bwd_bottom_diff_[i]->create_user_layout(dim_src, sizes_src, strides_src, false);
   }
 
   // XXX: almost the same computations as above for src
@@ -94,21 +95,35 @@ void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       sizes_dst[d] = bottom[0]->shape()[dim_dst - 1 - d];
     strides_dst[d] = (d == 0) ? 1 : strides_dst[d - 1] * sizes_dst[d - 1];
   }
-  bwd_top_diff_->create_user_layout(dim_dst, sizes_dst, strides_dst);
-  fwd_top_data_->create_user_layout(dim_dst, sizes_dst, strides_dst);
+  bwd_top_diff_->create_user_layout(dim_dst, sizes_dst, strides_dst, false);
+  fwd_top_data_->create_user_layout(dim_dst, sizes_dst, strides_dst, false);
 
   dnnDelete<Dtype>(concatFwd_);
   dnnDelete<Dtype>(concatBwd_);
 }
 
+template <typename Dtype>
+void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+  const vector<Blob<Dtype>*>& top) {
+  Init(bottom,top);
+}
+
 template <typename Dtype>
 void MKLConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   const vector<Blob<Dtype>*>& top) {
+
+  if((num_ == bottom[0]->num()) &&
+      height_ == bottom[0]->height() &&
+      width_ == bottom[0]->width()) {
+      top[0]->Reshape(num_, channels_, height_, width_);
+    return;
+  }
+
   num_ = bottom[0]->num();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
-
   top[0]->Reshape(num_, channels_, height_, width_);
+  Init(bottom,top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_lrn_layer.cpp b/src/caffe/layers/mkl_lrn_layer.cpp
index 36ae14948..a4e187452 100644
--- a/src/caffe/layers/mkl_lrn_layer.cpp
+++ b/src/caffe/layers/mkl_lrn_layer.cpp
@@ -52,7 +52,7 @@ MKLLRNLayer<Dtype>::~MKLLRNLayer() {
 }
 
 template <typename Dtype>
-void MKLLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLLRNLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   size_ = this->layer_param_.lrn_param().local_size();
   CHECK_EQ(size_ % 2, 1) << "LRN only supports odd values for local_size";
@@ -83,14 +83,26 @@ void MKLLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
 
-  fwd_bottom_data->create_user_layout(dim, sizes, strides);
-  fwd_top_data   ->create_user_layout(dim, sizes, strides);
-  bwd_bottom_diff->create_user_layout(dim, sizes, strides);
-  bwd_top_diff   ->create_user_layout(dim, sizes, strides);
+  fwd_bottom_data->create_user_layout(dim, sizes, strides, false);
+  fwd_top_data   ->create_user_layout(dim, sizes, strides, false);
+  bwd_bottom_diff->create_user_layout(dim, sizes, strides, false);
+  bwd_top_diff   ->create_user_layout(dim, sizes, strides, false);
 
   // Fwd, Bwd primitives and lrn_buffer_ are allocated in  "Lazy"
   // mode, because here we don't know
   // what layout is used by neighbours.
+  dnnDelete<Dtype>(lrnFwd);
+  dnnDelete<Dtype>(lrnBwd);
+  if(lrn_buffer_) {
+    dnnReleaseBuffer<Dtype>(lrn_buffer_);
+    lrn_buffer_ = NULL;
+  }
+}
+
+template <typename Dtype>
+void MKLLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  Init(bottom,top);
 }
 
 template <typename Dtype>
@@ -98,6 +110,15 @@ void MKLLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
       << "corresponding to (num, channels, height, width)";
+
+  bool reshaping = true;
+  if((num_ == bottom[0]->num()) &&
+      channels_ == bottom[0]->channels() &&
+      height_ == bottom[0]->height() &&
+      width_ == bottom[0]->width()) {
+    reshaping = false;
+  }
+
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
@@ -112,6 +133,10 @@ void MKLLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   default:
     LOG(FATAL) << "Unknown normalization region.";
   }
+
+  if (reshaping == true) {
+    Init(bottom,top);
+  }
 }
 
 template <typename Dtype>

From 2ef9e882ed4bf80c3bc17d65b876f11056bbdf29 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 18 Oct 2016 16:16:23 +0200
Subject: [PATCH 05/12] - cosmetic fixes and Reshape fix to MKLBatchNorm

---
 include/caffe/layers/mkl_layers.hpp       | 32 ++++++++-------
 include/mkl_dnn_cppwrapper.h              | 40 +++++++++++++------
 src/caffe/layers/mkl_batch_norm_layer.cpp | 47 ++++++++++++++++-------
 src/caffe/layers/mkl_lrn_layer.cpp        |  6 +--
 src/caffe/layers/mkl_relu_layer.cpp       | 30 +++++++++------
 src/caffe/mkl_memory.cpp                  | 16 +++-----
 6 files changed, 108 insertions(+), 63 deletions(-)

diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index fdd92f59a..52c22c3f8 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -73,8 +73,8 @@ class MKLConvolutionLayer : public ConvolutionLayer<Dtype> {
                             const vector<bool>& propagate_down,
                             const vector<Blob<Dtype>*>& bottom);
   // Customized methods
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
@@ -169,8 +169,8 @@ class MKLLRNLayer : public Layer<Dtype> {
   virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   int size_;
   int pre_pad_;
@@ -208,8 +208,8 @@ class MKLPoolingLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
                        const vector<Blob<Dtype>*>& top);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual inline const char* type() const { return "Pooling"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
@@ -279,8 +279,8 @@ class MKLReLULayer : public NeuronLayer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
                        const vector<Blob<Dtype>*>& top);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual inline const char* type() const { return "ReLU"; }
 
@@ -337,8 +337,9 @@ class MKLConcatLayer : public Layer<Dtype> {
                             const vector<bool>& propagate_down,
                             const vector<Blob<Dtype>*>& bottom);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
  private:
   dnnPrimitive_t concatFwd_;
   dnnPrimitive_t concatBwd_;
@@ -391,6 +392,9 @@ class MKLBatchNormLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
 //  Dtype moving_average_fraction_;
   Dtype eps_;
   bool use_weight_bias_;
@@ -426,8 +430,8 @@ class MKLSplitLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual inline const char* type() const { return "Split"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
@@ -467,8 +471,8 @@ class MKLEltwiseLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
-  void Init( const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top);
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
 
   virtual inline const char* type() const { return "Eltwise"; }
   virtual inline int MinBottomBlobs() const { return 2; }
diff --git a/include/mkl_dnn_cppwrapper.h b/include/mkl_dnn_cppwrapper.h
index 36be959e4..9608dff33 100644
--- a/include/mkl_dnn_cppwrapper.h
+++ b/include/mkl_dnn_cppwrapper.h
@@ -104,12 +104,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           void* ptr);
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<float>(
           void* ptr) {
-    dnnError_t status = dnnReleaseBuffer_F32(ptr);
+    dnnError_t status = E_SUCCESS;
+    if( ptr != NULL) {
+      status = dnnReleaseBuffer_F32(ptr);
+    }
     return status; 
   }
   SPEC_PREFIX dnnError_t dnnReleaseBuffer<double>(
           void* ptr) {
-    dnnError_t status = dnnReleaseBuffer_F64(ptr);
+    dnnError_t status = E_SUCCESS;
+    if( ptr != NULL) {
+      status = dnnReleaseBuffer_F64(ptr);
+    }
     return status; 
   }
 
@@ -117,16 +123,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           dnnLayout_t& layout);
   SPEC_PREFIX dnnError_t dnnLayoutDelete<float>(
           dnnLayout_t& layout) {
-    dnnError_t status = dnnLayoutDelete_F32(layout);
-    layout = NULL;
+    dnnError_t status = E_SUCCESS;
+    if( layout != NULL) {
+      status = dnnLayoutDelete_F32(layout);
+      layout = NULL;
+    }
     return status;
   }
   SPEC_PREFIX dnnError_t dnnLayoutDelete<double>(
           dnnLayout_t& layout) {
-    dnnError_t status = dnnLayoutDelete_F64(layout);
-    layout = NULL;
+    dnnError_t status = E_SUCCESS;
+    if( layout != NULL) {
+      status = dnnLayoutDelete_F64(layout);
+      layout = NULL;
+    }
     return status;
-}
+  }
 
 TEMPLATE_PREFIX dnnError_t dnnPrimitiveAttributesCreate(
         dnnPrimitiveAttributes_t *attributes);
@@ -190,14 +202,20 @@ TEMPLATE_PREFIX dnnError_t dnnDelete(
         dnnPrimitive_t& primitive);
 SPEC_PREFIX dnnError_t dnnDelete<float>(
         dnnPrimitive_t& primitive) {
-  dnnError_t status = dnnDelete_F32(primitive); 
-  primitive = NULL;
+  dnnError_t status = E_SUCCESS;
+  if (primitive != NULL) {
+    status = dnnDelete_F32(primitive); 
+    primitive = NULL;
+  }
   return status;
 }
 SPEC_PREFIX dnnError_t dnnDelete<double>(
         dnnPrimitive_t& primitive) {
-  dnnError_t status = dnnDelete_F64(primitive); 
-  primitive = NULL;
+  dnnError_t status = E_SUCCESS;
+  if (primitive != NULL) {
+    status = dnnDelete_F64(primitive); 
+    primitive = NULL;
+  }
   return status;
 }
 
diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp
index ed5d10141..f0a6a8801 100644
--- a/src/caffe/layers/mkl_batch_norm_layer.cpp
+++ b/src/caffe/layers/mkl_batch_norm_layer.cpp
@@ -47,9 +47,9 @@ namespace caffe {
 
 template <typename Dtype>
 MKLBatchNormLayer<Dtype>::~MKLBatchNormLayer() {
-  if (batchNormFwd != NULL) dnnDelete<Dtype>(batchNormFwd);
-  if (batchNormBwdData != NULL) dnnDelete<Dtype>(batchNormBwdData);
-  if (batchNormBwdScaleShift != NULL) dnnDelete<Dtype>(batchNormBwdScaleShift);
+  dnnDelete<Dtype>(batchNormFwd);
+  dnnDelete<Dtype>(batchNormBwdData);
+  dnnDelete<Dtype>(batchNormBwdScaleShift);
 
   dnnLayoutDelete<Dtype>(layout_usr_);
   dnnReleaseBuffer<Dtype>(workspace_buffer_);
@@ -57,7 +57,7 @@ MKLBatchNormLayer<Dtype>::~MKLBatchNormLayer() {
 }
 
 template <typename Dtype>
-void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+void MKLBatchNormLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   eps_ = this->layer_param_.batch_norm_param().eps();
   use_weight_bias_ = this->layer_param_.batch_norm_param().use_weight_bias();
@@ -93,24 +93,27 @@ void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
 
+  // TODO: Make a cleanup routine to avoid copy of following code in the Destructor
+
   dnnError_t e;
+  dnnLayoutDelete<Dtype>(layout_usr_);
   e = dnnLayoutCreate<Dtype>(&layout_usr_, dim, sizes, strides);
   CHECK_EQ(e, E_SUCCESS);
 
-  fwd_bottom_data->create_user_layout(dim, sizes, strides);
-  fwd_top_data   ->create_user_layout(dim, sizes, strides);
-  bwd_bottom_diff->create_user_layout(dim, sizes, strides);
-  bwd_top_diff   ->create_user_layout(dim, sizes, strides);
+  fwd_bottom_data->create_user_layout(dim, sizes, strides, false);
+  fwd_top_data   ->create_user_layout(dim, sizes, strides, false);
+  bwd_bottom_diff->create_user_layout(dim, sizes, strides, false);
+  bwd_top_diff   ->create_user_layout(dim, sizes, strides, false);
 
-  workspace_buffer_ = NULL;
-  scaleShift_buffer_ = NULL;
+  dnnReleaseBuffer<Dtype>(workspace_buffer_);
+  dnnReleaseBuffer<Dtype>(scaleShift_buffer_);
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
 
   // Primitives will be allocated during the first fwd pass
-  batchNormFwd = NULL;
-  batchNormBwdData = NULL;
-  batchNormBwdScaleShift = NULL;
+  dnnDelete<Dtype>(batchNormFwd);
+  dnnDelete<Dtype>(batchNormBwdData);
+  dnnDelete<Dtype>(batchNormBwdScaleShift);
 
   if (use_weight_bias_) {
     if ( bias_term_ ) {
@@ -147,10 +150,24 @@ void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }
 }
 
+template <typename Dtype>
+void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  Init(bottom,top);
+}
 
 template <typename Dtype>
 void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+
+  bool reshaping = true;
+  if((num_ == bottom[0]->num()) &&
+      channels_ == bottom[0]->channels() &&
+      height_ == bottom[0]->height() &&
+      width_ == bottom[0]->width()) {
+    reshaping = false;
+  }
+
   if (bottom[0] == top[0]) {  // in-place computation
     temp_.ReshapeLike(*bottom[0]);
   } else {
@@ -160,6 +177,10 @@ void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     num_ = bottom[0]->num();
     top[0]->Reshape(num_, channels_, height_, width_);
   }
+
+  if (reshaping == true) {
+    Init(bottom,top);
+  }
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_lrn_layer.cpp b/src/caffe/layers/mkl_lrn_layer.cpp
index a4e187452..002f48a80 100644
--- a/src/caffe/layers/mkl_lrn_layer.cpp
+++ b/src/caffe/layers/mkl_lrn_layer.cpp
@@ -93,10 +93,8 @@ void MKLLRNLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
   // what layout is used by neighbours.
   dnnDelete<Dtype>(lrnFwd);
   dnnDelete<Dtype>(lrnBwd);
-  if(lrn_buffer_) {
-    dnnReleaseBuffer<Dtype>(lrn_buffer_);
-    lrn_buffer_ = NULL;
-  }
+  dnnReleaseBuffer<Dtype>(lrn_buffer_);
+  lrn_buffer_ = NULL;
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_relu_layer.cpp b/src/caffe/layers/mkl_relu_layer.cpp
index fd4120f0b..7e5e04f63 100644
--- a/src/caffe/layers/mkl_relu_layer.cpp
+++ b/src/caffe/layers/mkl_relu_layer.cpp
@@ -61,15 +61,23 @@ void MKLReLULayer<Dtype>::Init(
   }
 
   // Names are for debugging only
-  this->fwd_bottom_data_->name = "fwd_bottom_data   @ " + this->layer_param_.name();
-  this->fwd_top_data_->name =    "fwd_top_data      @ " + this->layer_param_.name();
-  this->bwd_bottom_diff_->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
-  this->bwd_top_diff_->name =    "bwd_top_diff      @ " + this->layer_param_.name();
-
-  this->fwd_bottom_data_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
-  this->fwd_top_data_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
-  this->bwd_bottom_diff_->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
-  this->bwd_top_diff_   ->create_user_layout(dim, &(this->sizes_[0]), &(this->strides_[0]), false);
+  this->fwd_bottom_data_->name = "fwd_bottom_data   @ " +
+                                 this->layer_param_.name();
+  this->fwd_top_data_->name =    "fwd_top_data      @ " +
+                                 this->layer_param_.name();
+  this->bwd_bottom_diff_->name = "bwd_bottom_diff   @ " +
+                                 this->layer_param_.name();
+  this->bwd_top_diff_->name =    "bwd_top_diff      @ " +
+                                 this->layer_param_.name();
+
+  this->fwd_bottom_data_->create_user_layout(dim, &(this->sizes_[0]),
+                                             &(this->strides_[0]), false);
+  this->fwd_top_data_   ->create_user_layout(dim, &(this->sizes_[0]),
+                                             &(this->strides_[0]), false);
+  this->bwd_bottom_diff_->create_user_layout(dim, &(this->sizes_[0]),
+                                             &(this->strides_[0]), false);
+  this->bwd_top_diff_   ->create_user_layout(dim, &(this->sizes_[0]),
+                                             &(this->strides_[0]), false);
 
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
@@ -81,13 +89,13 @@ template <typename Dtype>
 void MKLReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
 //  CHECK_EQ(top[0]->shape(), bottom[0]->shape());
-    Init(bottom,top); 
+    Init(bottom, top); 
 }
 
 template <typename Dtype>
 void MKLReLULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  NeuronLayer<Dtype>::Reshape(bottom,top);
+  NeuronLayer<Dtype>::Reshape(bottom, top);
 
   // Here I check for sizes whther to destroy primitives
   size_t dim = bottom[0]->shape().size();
diff --git a/src/caffe/mkl_memory.cpp b/src/caffe/mkl_memory.cpp
index 2ae21217c..ab44c9821 100644
--- a/src/caffe/mkl_memory.cpp
+++ b/src/caffe/mkl_memory.cpp
@@ -136,11 +136,9 @@ void MKLMemoryDescriptorBase<Dtype>::remove_internal_layout() {
 
     // with layout invalidated we should also remove Allocation
     // as next layout may declare diffrent sizes of allocation
-    if(this->internal_ptr) {
-      status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
-      this->internal_ptr = NULL;
-      CHECK_EQ(status, E_SUCCESS);
-    }
+    status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
+    this->internal_ptr = NULL;
+    CHECK_EQ(status, E_SUCCESS);
   }
 }
 
@@ -155,11 +153,9 @@ void MKLMemoryDescriptorBase<Dtype>::remove_user_layout() {
 
     // with layout invalidated we should also remove Allocation
     // as next layout may declare diffrent sizes of allocation
-    if(this->internal_ptr) {
-      status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
-      this->internal_ptr = NULL;
-      CHECK_EQ(status, E_SUCCESS);
-    }
+    status = dnnReleaseBuffer<Dtype>(this->internal_ptr);
+    this->internal_ptr = NULL;
+    CHECK_EQ(status, E_SUCCESS);
   }
 }
 

From dec74b61c355f6cc5a8e213f9b8b80b9a5314f86 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Thu, 20 Oct 2016 12:46:02 +0200
Subject: [PATCH 06/12] - Fixes to MKL concat layer

---
 src/caffe/layers/mkl_concat_layer.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index 787841879..8762c67f9 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -105,6 +105,9 @@ void MKLConcatLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const vector<Blob<Dtype>*>& top) {
+  num_ = 0;
+  height_ = 0;
+  width_ = 0;
   Init(bottom,top);
 }
 
@@ -133,10 +136,8 @@ void MKLConcatLayer<Dtype>::Forward_cpu(const vector <Blob<Dtype>*>& bottom,
   vector<void*> bottom_data;
   bool isFirstPass = (concatFwd_ == NULL);
   dnnLayout_t *layouts = NULL;
-  bool *isBottomDataFilled = NULL;
   if (isFirstPass) {
       layouts = new dnnLayout_t[num_concats_];
-      isBottomDataFilled = new bool[num_concats_]();
   }
 
   for (size_t n = 0; n < num_concats_; n++) {
@@ -148,7 +149,6 @@ void MKLConcatLayer<Dtype>::Forward_cpu(const vector <Blob<Dtype>*>& bottom,
         reinterpret_cast<void *>(const_cast<Dtype*>(bottom[n]->cpu_data()));
       if (isFirstPass) {
         layouts[n] = fwd_bottom_data_[n]->layout_usr;
-        isBottomDataFilled[n] = true;
       }
     } else if (isFirstPass) {
       CHECK((bottom[n]->get_prv_data_descriptor())->get_descr_type() ==
@@ -175,17 +175,12 @@ void MKLConcatLayer<Dtype>::Forward_cpu(const vector <Blob<Dtype>*>& bottom,
     CHECK_EQ(e, E_SUCCESS);
 
     for (size_t n = 0; n < num_concats_; ++n) {
-      if (isBottomDataFilled[n]) continue;
-
-      fwd_bottom_data_[n]->create_internal_layout(concatFwd_,
-          (dnnResourceType_t)(dnnResourceMultipleSrc + n));
       bwd_bottom_diff_[n]->create_internal_layout(concatBwd_,
           (dnnResourceType_t)(dnnResourceMultipleDst + n));
     }
   }
 
   delete[] layouts;
-  delete[] isBottomDataFilled;
 
   void *concat_res[dnnResourceNumber];
   for (int n = 0; n < num_concats_; ++n) {

From 88a2e049b9df0c2f596b9cbdf782f98672f67e3b Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Thu, 20 Oct 2016 13:14:35 +0200
Subject: [PATCH 07/12] - Lint fixes

---
 src/caffe/layers/inner_product_layer.cpp   |  2 +-
 src/caffe/layers/mkl_batch_norm_layer.cpp  | 19 ++++++++-------
 src/caffe/layers/mkl_concat_layer.cpp      |  1 -
 src/caffe/layers/mkl_convolution_layer.cpp |  6 ++---
 src/caffe/layers/mkl_lrn_layer.cpp         |  6 ++---
 src/caffe/layers/mkl_pooling_layer.cpp     | 19 +++++++--------
 src/caffe/layers/mkl_relu_layer.cpp        | 19 ++++++++-------
 src/caffe/layers/mkl_split_layer.cpp       | 27 ++++++++++++++--------
 8 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index e5c21f34a..8e6dff22c 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -183,6 +183,6 @@ STUB_GPU(InnerProductLayer);
 
 INSTANTIATE_CLASS(InnerProductLayer);
 
-//REGISTER_LAYER_CLASS(InnerProduct);
+// REGISTER_LAYER_CLASS(InnerProduct);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp
index f0a6a8801..9e3189f63 100644
--- a/src/caffe/layers/mkl_batch_norm_layer.cpp
+++ b/src/caffe/layers/mkl_batch_norm_layer.cpp
@@ -93,7 +93,8 @@ void MKLBatchNormLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
 
-  // TODO: Make a cleanup routine to avoid copy of following code in the Destructor
+  // TODO: Make a cleanup routine to avoid
+  // copy of following code in the Destructor
 
   dnnError_t e;
   dnnLayoutDelete<Dtype>(layout_usr_);
@@ -153,15 +154,14 @@ void MKLBatchNormLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void MKLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
 void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-
   bool reshaping = true;
-  if((num_ == bottom[0]->num()) &&
+  if ((num_ == bottom[0]->num()) &&
       channels_ == bottom[0]->channels() &&
       height_ == bottom[0]->height() &&
       width_ == bottom[0]->width()) {
@@ -179,7 +179,7 @@ void MKLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   }
 
   if (reshaping == true) {
-    Init(bottom,top);
+    Init(bottom, top);
   }
 }
 
@@ -323,13 +323,16 @@ void MKLBatchNormLayer<Dtype>::Backward_cpu(
     const vector<Blob<Dtype>*>& bottom) {
   void *bottom_data = NULL;
   if (bottom[0] == top[0]) {
-    bottom_data = reinterpret_cast<void *>(const_cast<Dtype*>(temp_.cpu_data()));
+    bottom_data = reinterpret_cast<void *>(
+                        const_cast<Dtype*>(temp_.cpu_data()));
   } else {
     bottom_data =
-            reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->prv_data()));
+            reinterpret_cast<void *>(
+                        const_cast<Dtype*>(bottom[0]->prv_data()));
     if (NULL == bottom_data)
       bottom_data =
-            reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->cpu_data()));
+            reinterpret_cast<void *>(
+                        const_cast<Dtype*>(bottom[0]->cpu_data()));
   }
 
   dnnError_t e;
diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index 8762c67f9..a16d76cc9 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -51,7 +51,6 @@ template <typename Dtype> MKLConcatLayer<Dtype>::~MKLConcatLayer() {
 template <typename Dtype>
 void MKLConcatLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
   const vector<Blob<Dtype>*>& top) {
-
   size_t dim_src = bottom[0]->shape().size();
   size_t dim_dst = dim_src;
 
diff --git a/src/caffe/layers/mkl_convolution_layer.cpp b/src/caffe/layers/mkl_convolution_layer.cpp
index 3f76539da..98daab326 100644
--- a/src/caffe/layers/mkl_convolution_layer.cpp
+++ b/src/caffe/layers/mkl_convolution_layer.cpp
@@ -317,17 +317,15 @@ void MKLConvolutionLayer<Dtype>::Init(
     bwdb_bias_diff_iter->create_layouts(convolutionBwdBias, dnnResourceDiffBias,
                                         1, bias_sizes, bias_strides);
   }
-
 }
 
-
 template <typename Dtype>
 void MKLConvolutionLayer<Dtype>::LayerSetUp(
       const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   ConvolutionLayer<Dtype>::LayerSetUp(bottom, top);
 
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
@@ -341,7 +339,7 @@ void MKLConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       this->num_ == bottom[0]->num())
     return;
 
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_lrn_layer.cpp b/src/caffe/layers/mkl_lrn_layer.cpp
index 002f48a80..db7e361cb 100644
--- a/src/caffe/layers/mkl_lrn_layer.cpp
+++ b/src/caffe/layers/mkl_lrn_layer.cpp
@@ -100,7 +100,7 @@ void MKLLRNLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void MKLLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
@@ -110,7 +110,7 @@ void MKLLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       << "corresponding to (num, channels, height, width)";
 
   bool reshaping = true;
-  if((num_ == bottom[0]->num()) &&
+  if ((num_ == bottom[0]->num()) &&
       channels_ == bottom[0]->channels() &&
       height_ == bottom[0]->height() &&
       width_ == bottom[0]->width()) {
@@ -133,7 +133,7 @@ void MKLLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   }
 
   if (reshaping == true) {
-    Init(bottom,top);
+    Init(bottom, top);
   }
 }
 
diff --git a/src/caffe/layers/mkl_pooling_layer.cpp b/src/caffe/layers/mkl_pooling_layer.cpp
index a49a3a111..26592b2d5 100644
--- a/src/caffe/layers/mkl_pooling_layer.cpp
+++ b/src/caffe/layers/mkl_pooling_layer.cpp
@@ -57,8 +57,7 @@ MKLPoolingLayer<Dtype>::~MKLPoolingLayer() {
 template <typename Dtype>
 void MKLPoolingLayer<Dtype>::Init(
       const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) 
-{
+      const vector<Blob<Dtype>*>& top) {
   PoolingParameter pool_param = this->layer_param_.pooling_param();
 
   channels_ = bottom[0]->channels();
@@ -200,10 +199,10 @@ void MKLPoolingLayer<Dtype>::Init(
   bwd_top_diff->name =    "bwd_top_diff      @ " + this->layer_param_.name();
   bwd_bottom_diff->name = "bwd_bottom_diff   @ " + this->layer_param_.name();
 
-  fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides,false);
-  fwd_top_data   ->create_user_layout(dim, dst_sizes, dst_strides,false);
-  bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides,false);
-  bwd_top_diff   ->create_user_layout(dim, dst_sizes, dst_strides,false);
+  fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides, false);
+  fwd_top_data   ->create_user_layout(dim, dst_sizes, dst_strides, false);
+  bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides, false);
+  bwd_top_diff   ->create_user_layout(dim, dst_sizes, dst_strides, false);
   // Primitives will be allocated during the first fwd pass
   dnnDelete<Dtype>(poolingFwd);
   dnnDelete<Dtype>(poolingBwd);
@@ -211,9 +210,8 @@ void MKLPoolingLayer<Dtype>::Init(
 
 template <typename Dtype>
 void MKLPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) 
-{
-  Init(bottom,top);
+      const vector<Blob<Dtype>*>& top) {
+  Init(bottom, top);
 }
 
 template <typename Dtype>
@@ -229,8 +227,7 @@ void MKLPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     return;
   }
 
-  Init(bottom,top);
-
+  Init(bottom, top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_relu_layer.cpp b/src/caffe/layers/mkl_relu_layer.cpp
index 7e5e04f63..71d811f81 100644
--- a/src/caffe/layers/mkl_relu_layer.cpp
+++ b/src/caffe/layers/mkl_relu_layer.cpp
@@ -81,15 +81,15 @@ void MKLReLULayer<Dtype>::Init(
 
   // "Lazy" allocation because here we don't know
   // what layout is used by neighbours.
-  dnnDelete<Dtype>(reluFwd_); // Will be allocated in a "lazy" way in first forward pass
-  dnnDelete<Dtype>(reluBwd_); // Will be allocated in a "lazy" way in first backward pass
+  dnnDelete<Dtype>(reluFwd_);
+  dnnDelete<Dtype>(reluBwd_);
 }
 
 template <typename Dtype>
 void MKLReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
 //  CHECK_EQ(top[0]->shape(), bottom[0]->shape());
-    Init(bottom, top); 
+    Init(bottom, top);
 }
 
 template <typename Dtype>
@@ -103,23 +103,26 @@ void MKLReLULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   // If dimensions of blobs are the same as they were then
   // do not really destroy primitives
   if (dim == this->sizes_.size()) {
-    //.. check for strides and size dims if they corresspond each other
+    // .. check for strides and size dims if they corresspond each other
 
     // TODO: speedup comparison?
     bool is_match = true;
     for (size_t d = 0; d < dim; ++d) {
-        is_match = is_match && (this->sizes_[d] == bottom[0]->shape()[dim - 1 - d]);
-        is_match = is_match && (this->strides_[d] == ((d == 0) ? 1 : this->strides_[d-1]*this->sizes_[d-1]));
+        is_match = is_match && (this->sizes_[d] ==
+                                bottom[0]->shape()[dim - 1 - d]);
+        is_match = is_match && (this->strides_[d] == ((d == 0) ? 1 :
+                                this->strides_[d-1]*this->sizes_[d-1]));
     }
 
     // If no new modification was done to layout sizes,
-    // strides realtivly to previous iteration then no primitives recreation is needed
+    // strides realtivly to previous iteration then
+    // no primitives recreation is needed
     if (is_match) {
       return;
     }
   }
 
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 
diff --git a/src/caffe/layers/mkl_split_layer.cpp b/src/caffe/layers/mkl_split_layer.cpp
index 28197fb7e..7a568a129 100644
--- a/src/caffe/layers/mkl_split_layer.cpp
+++ b/src/caffe/layers/mkl_split_layer.cpp
@@ -58,18 +58,25 @@ void MKLSplitLayer<Dtype>::Init(
   this->strides_src_.resize(dim_src);
   for (size_t d = 0; d < dim_src; ++d) {
     this->sizes_src_[d] = bottom[0]->shape()[dim_src - d - 1];
-    this->strides_src_[d] = (d == 0) ? 1 : this->strides_src_[d-1]*this->sizes_src_[d-1];
+    this->strides_src_[d] = (d == 0) ?
+                1 : this->strides_src_[d-1]*this->sizes_src_[d-1];
   }
 
   for (size_t i = 0; i < num_tops; ++i) {
     bwd_top_diff.push_back(shared_ptr<MKLDiff<Dtype> >(new MKLDiff<Dtype>));
-    bwd_top_diff[i]->create_user_layout(dim_src, &(this->sizes_src_[0]), &(this->strides_src_[0]),false);
+    bwd_top_diff[i]->create_user_layout(dim_src,
+                                        &(this->sizes_src_[0]),
+                                        &(this->strides_src_[0]),
+                                        false);
   }
 
   // Blob-wise coefficients for the elementwise operation.
   coeffs_ = vector<Dtype>(top.size(), 1);
 
-  bwd_bottom_diff->create_user_layout(dim_src, &(this->sizes_src_[0]), &(this->strides_src_[0]),false);
+  bwd_bottom_diff->create_user_layout(dim_src,
+                                      &(this->sizes_src_[0]),
+                                      &(this->strides_src_[0]),
+                                      false);
 
   // Primitive will be created at first time it is to be used
   dnnDelete<Dtype>(sumPrimitive);
@@ -78,7 +85,7 @@ void MKLSplitLayer<Dtype>::Init(
 template <typename Dtype>
 void MKLSplitLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
@@ -104,24 +111,26 @@ void MKLSplitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   // If dimensions of blobs are the same as they were then
   // do not really destroy primitives
   if (dim_src == this->sizes_src_.size()) {
-    //.. check for strides and size dims if they corresspond each other
+    // .. check for strides and size dims if they corresspond each other
 
     // TODO: speedup comparison?
     bool is_match = true;
     for (size_t d = 0; d < dim_src; ++d) {
-        is_match = is_match && (this->sizes_src_[d] == bottom[0]->shape()[dim_src - 1 - d]);
-        is_match = is_match && (this->strides_src_[d] == ((d == 0) ? 1 : 
+        is_match = is_match && (this->sizes_src_[d] ==
+                                bottom[0]->shape()[dim_src - 1 - d]);
+        is_match = is_match && (this->strides_src_[d] == ((d == 0) ? 1 :
                                 this->strides_src_[d-1]*this->sizes_src_[d-1]));
     }
 
     // If no new modification was done to layout sizes,
-    // strides realtivly to previous iteration then no primitives recreation is needed
+    // strides realtivly to previous iteration then
+    // no primitives recreation is needed
     if (is_match) {
       return;
     }
   }
 
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>

From c6ae61d7dee49492eee0f5fd2a4c604bdf2a79ab Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Fri, 21 Oct 2016 21:13:01 +0200
Subject: [PATCH 08/12] - Added unit tests for testing Forward Reshape Forward
 scenario

---
 src/caffe/test/test_net.cpp | 117 +++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 2 deletions(-)

diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 48559cf86..0bc09ac4c 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -2411,8 +2411,6 @@ TEST_F(FilterNetTest, TestFilterInOutByExcludeMultiRule) {
   this->RunFilterNetTest(input_proto_test, output_proto_test);
 }
 
-
-
 TYPED_TEST(NetTest, TestReshape) {
   typedef typename TypeParam::Dtype Dtype;
   // We set up bottom blobs of two different sizes, switch between
@@ -2485,6 +2483,121 @@ TYPED_TEST(NetTest, TestReshape) {
   EXPECT_FALSE(same_spatial_shape);
 }
 
+
+// This test is just checking if this
+// configuration does not explode
+TYPED_TEST(NetTest, TestForwardReshapeForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  const string& proto =
+      "name: 'TestNetwork' "
+      " layer {"
+      "   top: 'data'"
+      "   top: 'label'"
+      "   name: 'data'"
+      "   type: 'DummyData'"
+      "   dummy_data_param {"
+      "     shape: { dim: 32 dim: 3 dim: 227 dim: 227 }"
+      "     data_filler {"
+      "       type: 'constant'"
+      "       value: 0.01"
+      "     }"
+      "   }"
+      "   transform_param {"
+      "     mirror: true"
+      "     crop_size: 224"
+      "     mean_value: 104"
+      "     mean_value: 117"
+      "     mean_value: 123"
+      "   }"
+      " }"
+      " layer {"
+      "  bottom: 'data'"
+      "   top: 'conv'"
+      "   name: 'conv1'"
+      "   type: 'Convolution'"
+      "   param {"
+      "     lr_mult: 1"
+      "     decay_mult: 1"
+      "   }"
+      "   convolution_param {"
+      "     "
+      "     num_output: 64"
+      "     pad: 3"
+      "     kernel_size: 7"
+      "     stride: 2"
+      "     weight_filler {"
+      "       type: 'xavier'"
+      "     }"
+      "     bias_term: false"
+      "   }"
+      " }"
+      " layer {"
+      "   bottom: 'conv'"
+      "   top: 'relu1'"
+      "   name: 'relu1'"
+      "   type: 'ReLU'"
+      "   relu_param {"
+      "     "
+      "   }"
+      " }"
+      " layer {"
+      "   bottom: 'conv'"
+      "   top: 'relu2'"
+      "   name: 'relu2'"
+      "   type: 'ReLU'"
+      "   relu_param {"
+      "     "
+      "   }"
+      " }"
+      " layer {"
+      "   bottom: 'relu1'"
+      "   bottom: 'relu2'"
+      "   top: 'concat'"
+      "   name: 'concat'"
+      "   type: 'Concat'"
+      "   concat_param {"
+      "     "
+      "   }"
+      " } "
+      " layer {"
+      "   bottom: 'concat'"
+      "   top: 'lrn'"
+      "   name: 'LRN'"
+      "   type: 'LRN'"
+      "   lrn_param {"
+      "     local_size: 5"
+      "     alpha: 0.0001"
+      "     beta: 0.75"
+      "   }"
+      " }"
+      " layer {"
+      "   bottom: 'lrn'"
+      "   top: 'pooling'"
+      "   name: 'Pooling'"
+      "   type: 'Pooling'"
+      "   pooling_param {"
+      "     kernel_size: 5"
+      "     stride: 2"
+      "     pool: MAX"
+      "   }"
+      " }"
+      " layer {"
+      "   bottom: 'pooling'"
+      "   top: 'bn'"
+      "   name: 'BatchNorm'"
+      "   type: 'BatchNorm'"
+      "   batch_norm_param {"
+      "   }"
+      " }";
+    this->InitNetFromProtoString(proto);
+    this->net_->Forward();
+    shared_ptr<Blob<Dtype> > input_blob = this->net_->blob_by_name("data");
+    input_blob->Reshape(1, 3, 1280, 720);
+    this->net_->Forward();
+}
+
+
+
 TYPED_TEST(NetTest, TestSkipPropagateDown) {
   // check bottom_need_backward if propagate_down is true
   this->InitSkipPropNet(false);

From f076d3ab4dc97dec184982404cc6be2d4e4cecea Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 25 Oct 2016 10:40:39 +0200
Subject: [PATCH 09/12] - Fix to blob reshape

Desc:
When blob is being reshaped it has to be reinitialized
in terms of its sync state
---
 src/caffe/blob.cpp                     | 11 ++++++++---
 src/caffe/layers/mkl_concat_layer.cpp  | 21 +++++++++++++--------
 src/caffe/layers/mkl_eltwise_layer.cpp | 13 +++++++++----
 src/caffe/layers/mkldnn_lrn_layer.cpp  |  4 ++--
 4 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index cb249d21c..6adafb679 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -68,19 +68,24 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   }
   int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
 #endif
-
+  bool actual_reshaping = false;
   for (int i = 0; i < shape.size(); ++i) {
     CHECK_GE(shape[i], 0);
     if (count_ != 0) {
       CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
     }
     count_ *= shape[i];
-    shape_[i] = shape[i];
+    if (shape_[i] != shape[i]) {
+      actual_reshaping = true;
+      shape_[i] = shape[i];
+    }
 #ifndef CPU_ONLY
     shape_data[i] = shape[i];
 #endif
   }
-  if (count_ > capacity_) {
+  // We restart sync objects when there was change of shape
+  // requested count is bgger than current capacity 
+  if ( (actual_reshaping == true) || (count_ > capacity_) ) {
     capacity_ = count_;
     data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
     diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index a16d76cc9..d9ac118a7 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -81,8 +81,14 @@ void MKLConcatLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
 
     split_channels_[i] = bottom[i]->channels();
     channels_ += split_channels_[i];
-    fwd_bottom_data_[i]->create_user_layout(dim_src, sizes_src, strides_src, false);
-    bwd_bottom_diff_[i]->create_user_layout(dim_src, sizes_src, strides_src, false);
+    fwd_bottom_data_[i]->create_user_layout(dim_src,
+                                            sizes_src,
+                                            strides_src,
+                                            false);
+    bwd_bottom_diff_[i]->create_user_layout(dim_src,
+                                            sizes_src,
+                                            strides_src,
+                                            false);
   }
 
   // XXX: almost the same computations as above for src
@@ -113,11 +119,10 @@ void MKLConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void MKLConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   const vector<Blob<Dtype>*>& top) {
-
-  if((num_ == bottom[0]->num()) &&
-      height_ == bottom[0]->height() &&
-      width_ == bottom[0]->width()) {
-      top[0]->Reshape(num_, channels_, height_, width_);
+  if ((num_ == bottom[0]->num()) &&
+       height_ == bottom[0]->height() &&
+       width_ == bottom[0]->width()) {
+       top[0]->Reshape(num_, channels_, height_, width_);
     return;
   }
 
@@ -125,7 +130,7 @@ void MKLConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
   top[0]->Reshape(num_, channels_, height_, width_);
-  Init(bottom,top);
+  Init(bottom, top);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_eltwise_layer.cpp b/src/caffe/layers/mkl_eltwise_layer.cpp
index d6d1eb73a..83e60a0ad 100644
--- a/src/caffe/layers/mkl_eltwise_layer.cpp
+++ b/src/caffe/layers/mkl_eltwise_layer.cpp
@@ -50,9 +50,8 @@ MKLEltwiseLayer<Dtype>::~MKLEltwiseLayer() {
 }
 
 template <typename Dtype>
-void MKLEltwiseLayer<Dtype>::Init( const vector<Blob<Dtype>*>& bottom,
+void MKLEltwiseLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top) {
-  
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
@@ -82,8 +81,14 @@ void MKLEltwiseLayer<Dtype>::Init( const vector<Blob<Dtype>*>& bottom,
       bwd_bottom_diff.push_back(
         shared_ptr<MKLDiff<Dtype> >(new MKLDiff<Dtype>));
       CHECK_EQ(dim_src, bottom[i]->shape().size());
-      fwd_bottom_data[i]->create_user_layout(dim_src, sizes_src, strides_src,false);
-      bwd_bottom_diff[i]->create_user_layout(dim_src, sizes_src, strides_src,false);
+      fwd_bottom_data[i]->create_user_layout(dim_src,
+                                             sizes_src,
+                                             strides_src,
+                                             false);
+      bwd_bottom_diff[i]->create_user_layout(dim_src,
+                                             sizes_src,
+                                             strides_src,
+                                             false);
   }
 
   fwd_top_data->create_user_layout(dim_src, sizes_src, strides_src,false);
diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp
index ede8cde1f..25b37bfd0 100644
--- a/src/caffe/layers/mkldnn_lrn_layer.cpp
+++ b/src/caffe/layers/mkldnn_lrn_layer.cpp
@@ -45,8 +45,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace caffe {
 
 template <typename Dtype>
-void MKLDNNLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom
-                                        ,const vector<Blob<Dtype>*>& top)
+void MKLDNNLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+                                       const vector<Blob<Dtype>*>& top)
 {
     VLOG(1) << "MKLDNNLRNLayer<Dtype>::LayerSetUp: " << this->layer_param_.name();
 

From 85fa72c66af37eaba795541de9e4eff2659970ac Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Tue, 25 Oct 2016 15:21:01 +0200
Subject: [PATCH 10/12] - Fix to memory leak

---
 include/caffe/layers/mkl_layers.hpp   | 4 ++--
 src/caffe/layers/mkl_concat_layer.cpp | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 52c22c3f8..361b4b1b4 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -315,8 +315,8 @@ class MKLConcatLayer : public Layer<Dtype> {
         concatFwd_(static_cast<dnnPrimitive_t>(NULL)),
         concatBwd_(static_cast<dnnPrimitive_t>(NULL)),
         fwd_top_data_    (new MKLData<Dtype>()),
-        bwd_top_diff_    (new MKLDiff<Dtype>()) {
-      }
+        bwd_top_diff_    (new MKLDiff<Dtype>()),
+        split_channels_  (NULL) { }
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                           const vector<Blob<Dtype>*>& top);
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
diff --git a/src/caffe/layers/mkl_concat_layer.cpp b/src/caffe/layers/mkl_concat_layer.cpp
index d9ac118a7..e2929ca84 100644
--- a/src/caffe/layers/mkl_concat_layer.cpp
+++ b/src/caffe/layers/mkl_concat_layer.cpp
@@ -46,6 +46,7 @@ namespace caffe {
 template <typename Dtype> MKLConcatLayer<Dtype>::~MKLConcatLayer() {
   dnnDelete<Dtype>(concatFwd_);
   dnnDelete<Dtype>(concatBwd_);
+  delete[] split_channels_;
 }
 
 template <typename Dtype>
@@ -63,6 +64,8 @@ void MKLConcatLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
     CHECK_EQ(bottom[0]->width(), bottom[i]->width());
   }
 
+
+  delete[] split_channels_;
   split_channels_ = new size_t[num_concats_];
   for (size_t i = 0; i < num_concats_; ++i) {
     CHECK_EQ(dim_src, bottom[i]->shape().size());

From 8d03730b44dc708e082c08ba5bfe6d7d03f237e8 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Wed, 26 Oct 2016 10:49:01 +0200
Subject: [PATCH 11/12] - Fix for rectangular MKL pooling scenario

---
 src/caffe/layers/mkl_pooling_layer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/mkl_pooling_layer.cpp b/src/caffe/layers/mkl_pooling_layer.cpp
index 26592b2d5..55a1f92e6 100644
--- a/src/caffe/layers/mkl_pooling_layer.cpp
+++ b/src/caffe/layers/mkl_pooling_layer.cpp
@@ -135,11 +135,11 @@ void MKLPoolingLayer<Dtype>::Init(
     if ((pooled_height_ - 1) * stride_h_ >= bottom[0]->height() + pad_h_) {
       --pooled_height_;
     }
-    if ((pooled_width_ - 1) * stride_w_ >= bottom[0]->height() + pad_w_) {
+    if ((pooled_width_ - 1) * stride_w_ >= bottom[0]->width() + pad_w_) {
       --pooled_width_;
     }
     CHECK_LT((pooled_height_ - 1) * stride_h_, bottom[0]->height() + pad_h_);
-    CHECK_LT((pooled_width_ - 1) * stride_w_, bottom[0]->height() + pad_w_);
+    CHECK_LT((pooled_width_ - 1) * stride_w_, bottom[0]->width() + pad_w_);
   }
 
   top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,

From 8ddb521db44f6b223ec83743729d67208bebcf3b Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Fri, 28 Oct 2016 12:40:14 +0200
Subject: [PATCH 12/12] - ForwardReshapeForward ULT was limited to MKL2017
 engine

Desc:
- There is a problem with ULT for for caffe engine (likely conncected
  to Intel OpenMP)
---
 src/caffe/test/test_net.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 0bc09ac4c..38a3d02d9 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -2483,7 +2483,10 @@ TYPED_TEST(NetTest, TestReshape) {
   EXPECT_FALSE(same_spatial_shape);
 }
 
-
+// TODO: this test should work for Caffe Engine as well
+// but there were problems visible on Intel OpenMP
+// that need to be investigated
+#ifdef MKL2017_SUPPORTED
 // This test is just checking if this
 // configuration does not explode
 TYPED_TEST(NetTest, TestForwardReshapeForward) {
@@ -2522,6 +2525,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   convolution_param {"
       "     "
       "     num_output: 64"
+      "     engine: MKL2017 "
       "     pad: 3"
       "     kernel_size: 7"
       "     stride: 2"
@@ -2537,6 +2541,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'relu1'"
       "   type: 'ReLU'"
       "   relu_param {"
+      "     engine: MKL2017 "
       "     "
       "   }"
       " }"
@@ -2546,6 +2551,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'relu2'"
       "   type: 'ReLU'"
       "   relu_param {"
+      "     engine: MKL2017 "
       "     "
       "   }"
       " }"
@@ -2556,6 +2562,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'concat'"
       "   type: 'Concat'"
       "   concat_param {"
+      "     engine: MKL2017 "
       "     "
       "   }"
       " } "
@@ -2565,6 +2572,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'LRN'"
       "   type: 'LRN'"
       "   lrn_param {"
+      "     engine: MKL2017 "
       "     local_size: 5"
       "     alpha: 0.0001"
       "     beta: 0.75"
@@ -2576,6 +2584,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'Pooling'"
       "   type: 'Pooling'"
       "   pooling_param {"
+      "     engine: MKL2017 "
       "     kernel_size: 5"
       "     stride: 2"
       "     pool: MAX"
@@ -2587,6 +2596,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
       "   name: 'BatchNorm'"
       "   type: 'BatchNorm'"
       "   batch_norm_param {"
+      "     engine: MKL2017 "
       "   }"
       " }";
     this->InitNetFromProtoString(proto);
@@ -2595,6 +2605,7 @@ TYPED_TEST(NetTest, TestForwardReshapeForward) {
     input_blob->Reshape(1, 3, 1280, 720);
     this->net_->Forward();
 }
+#endif